Compare commits

...

11 Commits

Author SHA1 Message Date
diegosouzapw df23162e9d chore(release): v3.3.5 - all changes in ONE commit
Build Electron Desktop App / Validate version (push) Failing after 31s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
Build Electron Desktop App / Publish to npm (push) Has been skipped
2026-03-30 17:35:51 -03:00
dependabot[bot] 2c12f18b44 deps: bump the production group with 8 updates
Bumps the production group with 8 updates:

| Package | From | To |
| --- | --- | --- |
| [@lobehub/icons](https://github.com/lobehub/lobe-icons) | `5.0.1` | `5.2.0` |
| [@modelcontextprotocol/sdk](https://github.com/modelcontextprotocol/typescript-sdk) | `1.27.1` | `1.29.0` |
| [@swc/helpers](https://github.com/swc-project/swc/tree/HEAD/packages/helpers) | `0.5.19` | `0.5.20` |
| [jose](https://github.com/panva/jose) | `6.2.1` | `6.2.2` |
| [next](https://github.com/vercel/next.js) | `16.1.7` | `16.2.1` |
| [recharts](https://github.com/recharts/recharts) | `3.8.0` | `3.8.1` |
| [undici](https://github.com/nodejs/undici) | `7.24.4` | `7.24.6` |
| [wreq-js](https://github.com/sqdshguy/wreq-js) | `2.2.0` | `2.2.2` |


Updates `@lobehub/icons` from 5.0.1 to 5.2.0
- [Release notes](https://github.com/lobehub/lobe-icons/releases)
- [Changelog](https://github.com/lobehub/lobe-icons/blob/master/CHANGELOG.md)
- [Commits](https://github.com/lobehub/lobe-icons/compare/v5.0.1...v5.2.0)

Updates `@modelcontextprotocol/sdk` from 1.27.1 to 1.29.0
- [Release notes](https://github.com/modelcontextprotocol/typescript-sdk/releases)
- [Commits](https://github.com/modelcontextprotocol/typescript-sdk/compare/v1.27.1...v1.29.0)

Updates `@swc/helpers` from 0.5.19 to 0.5.20
- [Release notes](https://github.com/swc-project/swc/releases)
- [Changelog](https://github.com/swc-project/swc/blob/main/CHANGELOG-CORE.md)
- [Commits](https://github.com/swc-project/swc/commits/HEAD/packages/helpers)

Updates `jose` from 6.2.1 to 6.2.2
- [Release notes](https://github.com/panva/jose/releases)
- [Changelog](https://github.com/panva/jose/blob/main/CHANGELOG.md)
- [Commits](https://github.com/panva/jose/compare/v6.2.1...v6.2.2)

Updates `next` from 16.1.7 to 16.2.1
- [Release notes](https://github.com/vercel/next.js/releases)
- [Changelog](https://github.com/vercel/next.js/blob/canary/release.js)
- [Commits](https://github.com/vercel/next.js/compare/v16.1.7...v16.2.1)

Updates `recharts` from 3.8.0 to 3.8.1
- [Release notes](https://github.com/recharts/recharts/releases)
- [Changelog](https://github.com/recharts/recharts/blob/main/CHANGELOG.md)
- [Commits](https://github.com/recharts/recharts/compare/v3.8.0...v3.8.1)

Updates `undici` from 7.24.4 to 7.24.6
- [Release notes](https://github.com/nodejs/undici/releases)
- [Commits](https://github.com/nodejs/undici/compare/v7.24.4...v7.24.6)

Updates `wreq-js` from 2.2.0 to 2.2.2
- [Release notes](https://github.com/sqdshguy/wreq-js/releases)
- [Commits](https://github.com/sqdshguy/wreq-js/compare/v2.2.0...v2.2.2)

---
updated-dependencies:
- dependency-name: "@lobehub/icons"
  dependency-version: 5.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: production
- dependency-name: "@modelcontextprotocol/sdk"
  dependency-version: 1.29.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: production
- dependency-name: "@swc/helpers"
  dependency-version: 0.5.20
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production
- dependency-name: jose
  dependency-version: 6.2.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production
- dependency-name: next
  dependency-version: 16.2.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: production
- dependency-name: recharts
  dependency-version: 3.8.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production
- dependency-name: undici
  dependency-version: 7.24.6
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production
- dependency-name: wreq-js
  dependency-version: 2.2.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-30 17:32:55 -03:00
dependabot[bot] eaeb28b4e1 deps: bump the development group with 7 updates
Bumps the development group with 7 updates:

| Package | From | To |
| --- | --- | --- |
| [@tailwindcss/postcss](https://github.com/tailwindlabs/tailwindcss/tree/HEAD/packages/@tailwindcss-postcss) | `4.2.1` | `4.2.2` |
| [@types/keytar](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/keytar) | `4.4.0` | `4.4.2` |
| [eslint-config-next](https://github.com/vercel/next.js/tree/HEAD/packages/eslint-config-next) | `16.1.6` | `16.2.1` |
| [tailwindcss](https://github.com/tailwindlabs/tailwindcss/tree/HEAD/packages/tailwindcss) | `4.2.1` | `4.2.2` |
| [typescript](https://github.com/microsoft/TypeScript) | `5.9.3` | `6.0.2` |
| [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint) | `8.57.1` | `8.58.0` |
| [vitest](https://github.com/vitest-dev/vitest/tree/HEAD/packages/vitest) | `4.1.0` | `4.1.2` |


Updates `@tailwindcss/postcss` from 4.2.1 to 4.2.2
- [Release notes](https://github.com/tailwindlabs/tailwindcss/releases)
- [Changelog](https://github.com/tailwindlabs/tailwindcss/blob/main/CHANGELOG.md)
- [Commits](https://github.com/tailwindlabs/tailwindcss/commits/v4.2.2/packages/@tailwindcss-postcss)

Updates `@types/keytar` from 4.4.0 to 4.4.2
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/keytar)

Updates `eslint-config-next` from 16.1.6 to 16.2.1
- [Release notes](https://github.com/vercel/next.js/releases)
- [Changelog](https://github.com/vercel/next.js/blob/canary/release.js)
- [Commits](https://github.com/vercel/next.js/commits/v16.2.1/packages/eslint-config-next)

Updates `tailwindcss` from 4.2.1 to 4.2.2
- [Release notes](https://github.com/tailwindlabs/tailwindcss/releases)
- [Changelog](https://github.com/tailwindlabs/tailwindcss/blob/main/CHANGELOG.md)
- [Commits](https://github.com/tailwindlabs/tailwindcss/commits/v4.2.2/packages/tailwindcss)

Updates `typescript` from 5.9.3 to 6.0.2
- [Release notes](https://github.com/microsoft/TypeScript/releases)
- [Commits](https://github.com/microsoft/TypeScript/compare/v5.9.3...v6.0.2)

Updates `typescript-eslint` from 8.57.1 to 8.58.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.58.0/packages/typescript-eslint)

Updates `vitest` from 4.1.0 to 4.1.2
- [Release notes](https://github.com/vitest-dev/vitest/releases)
- [Commits](https://github.com/vitest-dev/vitest/commits/v4.1.2/packages/vitest)

---
updated-dependencies:
- dependency-name: "@tailwindcss/postcss"
  dependency-version: 4.2.2
  dependency-type: direct:development
  update-type: version-update:semver-patch
  dependency-group: development
- dependency-name: "@types/keytar"
  dependency-version: 4.4.2
  dependency-type: direct:development
  update-type: version-update:semver-patch
  dependency-group: development
- dependency-name: eslint-config-next
  dependency-version: 16.2.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: development
- dependency-name: tailwindcss
  dependency-version: 4.2.2
  dependency-type: direct:development
  update-type: version-update:semver-patch
  dependency-group: development
- dependency-name: typescript
  dependency-version: 6.0.2
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: development
- dependency-name: typescript-eslint
  dependency-version: 8.58.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: development
- dependency-name: vitest
  dependency-version: 4.1.2
  dependency-type: direct:development
  update-type: version-update:semver-patch
  dependency-group: development
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-30 17:32:51 -03:00
Chris Staley d5647eab33 fix: remove dead userDismissed ref after auto-open removal
The userDismissed ref was only read by the removed auto-open useEffect.
Remove the ref declaration and the three onClose assignments that set it.
2026-03-30 17:32:49 -03:00
Chris Staley 89eb8885b1 fix: remove unnecessary comment from previous commit 2026-03-30 17:32:49 -03:00
Chris Staley a5dc5687f8 fix: remove auto-opening OAuth/API key modal on provider detail page
Auto-opening the "Add Connection" dialog when navigating to a provider
with zero connections was a poor UX pattern. It surprised users who were
simply browsing provider details (e.g. after deleting a connection or
checking settings). The page already displays a clear empty state with
an "Add Connection" button — users should click it when ready.
2026-03-30 17:32:49 -03:00
oyi77 6780485051 feat(cache): persistent metrics, cache entry browser, settings UI, MCP tools, prefix analyzer
Implements remaining features from #813:

Phase 1 - Persistent Metrics:
- Add cache_metrics table for persistent hit/miss tracking
- Semantic cache stats now survive server restarts

Phase 2 - Cache Entry Browser:
- /api/cache/entries endpoint with search, pagination, delete
- CacheEntriesTab component for browsing cached entries

Phase 3 - Settings UI:
- CacheSettingsTab for semantic/prompt cache configuration
- /api/settings/cache-config endpoint

Phase 4 - Prefix Analyzer:
- src/lib/promptCache/prefixAnalyzer.ts for intelligent caching
- Analyzes message arrays to find stable prefixes

Phase 5 - Provider Support:
- Added deepseek to CACHING_PROVIDERS

Phase 6 - MCP Tools:
- omniroute_cache_stats tool
- omniroute_cache_flush tool

Phase 7 - Retention:
- cleanOldMetrics() for auto-purge of old entries

Closes #813
2026-03-30 17:32:45 -03:00
oyi77 d043e7a242 feat(cache): fix cache page to display prompt cache metrics and trend data
Closes #813
2026-03-30 17:32:45 -03:00
Chris Staley c5d9b5f51d fix: apply PR review feedback for Gemini CLI quota
- Add early return guard for missing accessToken in getGeminiUsage
- Add 10s fetch timeout (AbortSignal.timeout) on retrieveUserQuota calls
- Clamp used value with Math.max(0, ...) for non-negative display
- Use full accessToken as cache key instead of truncated prefix
- Replace catch(err: any) with instanceof Error check in models route
2026-03-30 17:32:42 -03:00
Chris Staley 35e2892b98 feat: add real Gemini CLI quota tracking via retrieveUserQuota API
Replace stub getGeminiUsage() with per-model quota fetching from Google
Cloud Code Assist's retrieveUserQuota endpoint (same API the official
Gemini CLI /stats command uses). Fixes OAuth env var name, aligns model
list with official Gemini CLI VALID_GEMINI_MODELS, and makes "Import
from /models" discover new models via the quota endpoint.
2026-03-30 17:32:42 -03:00
diegosouzapw 11dfdbb7a3 feat(analytics): add diversity score card UI and diversity API route
Implement DiversityScoreCard component to fetch and display provider diversity score with loading state and conditional styling, integrate it into AnalyticsPage overview, and add a new API route at src/app/api/analytics/diversity/route.ts to return the diversity report using getDiversityReport
2026-03-30 16:37:49 -03:00
29 changed files with 1657 additions and 277 deletions
+16
View File
@@ -3,6 +3,22 @@
## [Unreleased]
---
## [3.3.5] - 2026-03-30
### ✨ New Features
- **Gemini Quota Tracking:** Added real-time Gemini CLI quota tracking via the `retrieveUserQuota` API (PR #825)
- **Cache Dashboard:** Enhanced the Cache Dashboard to display prompt cache metrics, 24h trends, and estimated cost savings (PR #824)
### 🐛 Bug Fixes
- **Token Accounting:** Included prompt cache tokens safely in historical usage inputs calculations for correct quota deductions (PR #822)
- **User Experience:** Removed invasive auto-opening OAuth modal loops on barren provider detailed pages (PR #820)
- **Dependency Updates:** Bumped and locked down dependencies for development and production trees including Next.js 16.2.1, Recharts, and TailwindCSS 4.2.2 (PR #826, #827)
---
## [3.3.4] - 2026-03-30
### ✨ New Features
+1 -1
View File
@@ -1,7 +1,7 @@
openapi: 3.1.0
info:
title: OmniRoute API
version: 3.3.4
version: 3.3.5
description: |
OmniRoute is a local-first AI API proxy router. It provides an OpenAI-compatible
endpoint that routes requests to multiple AI providers with load balancing,
+4 -9
View File
@@ -226,23 +226,18 @@ export const REGISTRY: Record<string, RegistryEntry> = {
oauth: {
clientIdEnv: "GEMINI_CLI_OAUTH_CLIENT_ID",
clientIdDefault: "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com",
clientSecretEnv: "GEMINI_CLI_OAUTH_CLIENT_SECRET",
clientSecretEnv: "GEMINI_OAUTH_CLIENT_SECRET",
clientSecretDefault: "",
},
models: [
{ id: "gemini-3.1-pro-high", name: "Gemini 3.1 Pro High" },
{ id: "gemini-3.1-pro-low", name: "Gemini 3.1 Pro Low" },
{ id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" },
{ id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" },
{ id: "gemini-3-pro-preview", name: "Gemini 3 Pro Preview" },
{ id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
{ id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
{ id: "gemini-3.1-pro-preview-customtools", name: "Gemini 3.1 Pro Preview Custom Tools" },
{ id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
{ id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
{ id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
{ id: "gemini-2.0-flash", name: "Gemini 2.0 Flash" },
{ id: "gemini-1.5-pro", name: "Gemini 1.5 Pro" },
{ id: "gemini-1.5-flash", name: "Gemini 1.5 Flash" },
],
},
+6
View File
@@ -60,6 +60,12 @@ export {
getSessionSnapshotInput,
getSessionSnapshotOutput,
getSessionSnapshotTool,
cacheStatsInput,
cacheStatsOutput,
cacheStatsTool,
cacheFlushInput,
cacheFlushOutput,
cacheFlushTool,
} from "./tools.ts";
// A2A schemas
+65 -2
View File
@@ -806,11 +806,73 @@ export const syncPricingTool: McpToolDefinition<typeof syncPricingInput, typeof
sourceEndpoints: ["/api/pricing/sync"],
};
// ============ Cache Tools ============
export const cacheStatsInput = z.object({}).describe("No parameters required");
export const cacheStatsOutput = z.object({
semanticCache: z.object({
memoryEntries: z.number(),
dbEntries: z.number(),
hits: z.number(),
misses: z.number(),
hitRate: z.string(),
tokensSaved: z.number(),
}),
promptCache: z
.object({
totalRequests: z.number(),
requestsWithCacheControl: z.number(),
totalCachedTokens: z.number(),
totalCacheCreationTokens: z.number(),
estimatedCostSaved: z.number(),
})
.nullable(),
idempotency: z.object({
activeKeys: z.number(),
windowMs: z.number(),
}),
});
export const cacheStatsTool: McpToolDefinition<typeof cacheStatsInput, typeof cacheStatsOutput> = {
name: "omniroute_cache_stats",
description:
"Returns cache statistics including semantic cache hit rate, prompt cache metrics by provider, and idempotency layer stats.",
inputSchema: cacheStatsInput,
outputSchema: cacheStatsOutput,
scopes: ["read:cache"],
auditLevel: "basic",
phase: 2,
sourceEndpoints: ["/api/cache"],
};
export const cacheFlushInput = z.object({
signature: z.string().optional().describe("Specific cache signature to invalidate"),
model: z.string().optional().describe("Invalidate all entries for a specific model"),
});
export const cacheFlushOutput = z.object({
ok: z.boolean(),
invalidated: z.number().optional(),
scope: z.string().optional(),
});
export const cacheFlushTool: McpToolDefinition<typeof cacheFlushInput, typeof cacheFlushOutput> = {
name: "omniroute_cache_flush",
description:
"Flush cache entries. Provide signature to invalidate a single entry, model to invalidate all entries for a model, or omit both to clear all.",
inputSchema: cacheFlushInput,
outputSchema: cacheFlushOutput,
scopes: ["write:cache"],
auditLevel: "full",
phase: 2,
sourceEndpoints: ["/api/cache"],
};
// ============ Tool Registry ============
/** All MCP tool definitions, ordered by phase then name */
export const MCP_TOOLS = [
// Phase 1: Essential
getHealthTool,
listCombosTool,
getComboMetricsTool,
@@ -819,7 +881,6 @@ export const MCP_TOOLS = [
routeRequestTool,
costReportTool,
listModelsCatalogTool,
// Phase 2: Advanced
simulateRouteTool,
setBudgetGuardTool,
setRoutingStrategyTool,
@@ -830,6 +891,8 @@ export const MCP_TOOLS = [
explainRouteTool,
getSessionSnapshotTool,
syncPricingTool,
cacheStatsTool,
cacheFlushTool,
] as const;
/** Essential tools only (Phase 1) */
+172 -27
View File
@@ -159,13 +159,13 @@ async function getGlmUsage(apiKey: string, providerSpecificData?: Record<string,
* @returns {Promise<unknown>} Usage data with quotas
*/
export async function getUsageForProvider(connection) {
const { provider, accessToken, apiKey, providerSpecificData } = connection;
const { provider, accessToken, apiKey, providerSpecificData, projectId } = connection;
switch (provider) {
case "github":
return await getGitHubUsage(accessToken, providerSpecificData);
case "gemini-cli":
return await getGeminiUsage(accessToken);
return await getGeminiUsage(accessToken, providerSpecificData, projectId);
case "antigravity":
return await getAntigravityUsage(accessToken, undefined);
case "claude":
@@ -195,24 +195,22 @@ function parseResetTime(resetValue) {
if (!resetValue) return null;
try {
// If it's already a Date object
let date;
if (resetValue instanceof Date) {
return resetValue.toISOString();
date = resetValue;
} else if (typeof resetValue === "number") {
date = new Date(resetValue);
} else if (typeof resetValue === "string") {
date = new Date(resetValue);
} else {
return null;
}
// If it's a number (Unix timestamp in milliseconds)
if (typeof resetValue === "number") {
return new Date(resetValue).toISOString();
}
// Epoch-zero (1970-01-01) means no scheduled reset — treat as null
if (date.getTime() <= 0) return null;
// If it's a string (ISO date or parseable date string)
if (typeof resetValue === "string") {
return new Date(resetValue).toISOString();
}
return null;
return date.toISOString();
} catch (error) {
console.warn(`Failed to parse reset time: ${resetValue}`, error);
return null;
}
}
@@ -417,36 +415,183 @@ function inferGitHubPlanName(data: JsonRecord, premiumQuota: UsageQuota | null):
return "GitHub Copilot";
}
// ── Gemini CLI subscription info cache ──────────────────────────────────────
// Prevents duplicate loadCodeAssist calls within the same quota cycle.
// Key: accessToken → { data, fetchedAt }
const _geminiCliSubCache = new Map();
const GEMINI_CLI_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
/**
* Gemini CLI Usage (Google Cloud)
* Gemini CLI Usage fetch per-model quota from Cloud Code Assist API.
* Gemini CLI and Antigravity share the same upstream (cloudcode-pa.googleapis.com),
* so this follows the same pattern as getAntigravityUsage().
*/
async function getGeminiUsage(accessToken) {
async function getGeminiUsage(accessToken, providerSpecificData?, connectionProjectId?) {
if (!accessToken) {
return { plan: "Free", message: "Gemini CLI access token not available." };
}
try {
// Gemini CLI uses Google Cloud quotas
// Try to get quota info from Cloud Resource Manager
const subscriptionInfo = await getGeminiCliSubscriptionInfoCached(accessToken);
const projectId =
connectionProjectId ||
providerSpecificData?.projectId ||
subscriptionInfo?.cloudaicompanionProject ||
null;
const plan = getGeminiCliPlanLabel(subscriptionInfo);
if (!projectId) {
return { plan, message: "Gemini CLI project ID not available." };
}
// Use retrieveUserQuota (same endpoint as Gemini CLI /stats command).
// Returns per-model buckets with remainingFraction and resetTime.
const response = await fetch(
"https://cloudresourcemanager.googleapis.com/v1/projects?filter=lifecycleState:ACTIVE",
"https://cloudcode-pa.googleapis.com/v1internal:retrieveUserQuota",
{
method: "POST",
headers: {
Authorization: `Bearer ${accessToken}`,
Accept: "application/json",
"Content-Type": "application/json",
},
body: JSON.stringify({ project: projectId }),
signal: AbortSignal.timeout(10000),
}
);
if (!response.ok) {
// Quota API may not be accessible, return generic message
return {
message: "Gemini CLI uses Google Cloud quotas. Check Google Cloud Console for details.",
};
return { plan, message: `Gemini CLI quota error (${response.status}).` };
}
return { message: "Gemini CLI connected. Usage tracked via Google Cloud Console." };
const data = await response.json();
const quotas: Record<string, UsageQuota> = {};
if (Array.isArray(data.buckets)) {
for (const bucket of data.buckets) {
if (!bucket.modelId || bucket.remainingFraction == null) continue;
const remainingFraction = toNumber(bucket.remainingFraction, 0);
const remainingPercentage = remainingFraction * 100;
const QUOTA_NORMALIZED_BASE = 1000;
const total = QUOTA_NORMALIZED_BASE;
const remaining = Math.round(total * remainingFraction);
const used = Math.max(0, total - remaining);
quotas[bucket.modelId] = {
used,
total,
resetAt: parseResetTime(bucket.resetTime),
remainingPercentage,
unlimited: false,
};
}
}
return { plan, quotas };
} catch (error) {
return { message: "Unable to fetch Gemini usage. Check Google Cloud Console." };
return { message: `Gemini CLI error: ${(error as Error).message}` };
}
}
/**
* Get Gemini CLI subscription info (cached, 5 min TTL)
*/
async function getGeminiCliSubscriptionInfoCached(accessToken) {
const cacheKey = accessToken;
const cached = _geminiCliSubCache.get(cacheKey);
if (cached && Date.now() - cached.fetchedAt < GEMINI_CLI_CACHE_TTL_MS) {
return cached.data;
}
const data = await getGeminiCliSubscriptionInfo(accessToken);
_geminiCliSubCache.set(cacheKey, { data, fetchedAt: Date.now() });
return data;
}
/**
* Get Gemini CLI subscription info using correct headers.
*/
async function getGeminiCliSubscriptionInfo(accessToken) {
try {
const response = await fetch(
"https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist",
{
method: "POST",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
metadata: {
ideType: "IDE_UNSPECIFIED",
platform: "PLATFORM_UNSPECIFIED",
pluginType: "GEMINI",
},
}),
}
);
if (!response.ok) return null;
return await response.json();
} catch {
return null;
}
}
/**
* Map Gemini CLI subscription tier to display label (same tiers as Antigravity).
*/
function getGeminiCliPlanLabel(subscriptionInfo) {
if (!subscriptionInfo || Object.keys(subscriptionInfo).length === 0) return "Free";
let tierId = "";
if (Array.isArray(subscriptionInfo.allowedTiers)) {
for (const tier of subscriptionInfo.allowedTiers) {
if (tier.isDefault && tier.id) {
tierId = tier.id.trim().toUpperCase();
break;
}
}
}
if (!tierId) {
tierId = (subscriptionInfo.currentTier?.id || "").toUpperCase();
}
if (tierId) {
if (tierId.includes("ULTRA")) return "Ultra";
if (tierId.includes("PRO")) return "Pro";
if (tierId.includes("ENTERPRISE")) return "Enterprise";
if (tierId.includes("BUSINESS") || tierId.includes("STANDARD")) return "Business";
if (tierId.includes("FREE") || tierId.includes("INDIVIDUAL") || tierId.includes("LEGACY"))
return "Free";
}
const tierName =
subscriptionInfo.currentTier?.name ||
subscriptionInfo.currentTier?.displayName ||
subscriptionInfo.subscriptionType ||
subscriptionInfo.tier ||
"";
const upper = tierName.toUpperCase();
if (upper.includes("ULTRA")) return "Ultra";
if (upper.includes("PRO")) return "Pro";
if (upper.includes("ENTERPRISE")) return "Enterprise";
if (upper.includes("STANDARD") || upper.includes("BUSINESS")) return "Business";
if (upper.includes("INDIVIDUAL") || upper.includes("FREE")) return "Free";
if (subscriptionInfo.currentTier?.upgradeSubscriptionType) return "Free";
if (tierName) {
return tierName.charAt(0).toUpperCase() + tierName.slice(1).toLowerCase();
}
return "Free";
}
// ── Antigravity subscription info cache ──────────────────────────────────────
// Prevents duplicate loadCodeAssist calls within the same quota cycle.
// Key: truncated accessToken → { data, fetchedAt }
+1 -6
View File
@@ -72,12 +72,7 @@ const DETERMINISTIC_STRATEGIES: Set<RoutingStrategyValue> = new Set(["priority",
/**
* Providers that support prompt caching
*/
const CACHING_PROVIDERS = new Set([
"claude",
"anthropic",
"zai",
"qwen", // Alibaba Qwen Coding Plan International
]);
const CACHING_PROVIDERS = new Set(["claude", "anthropic", "zai", "qwen", "deepseek"]);
/**
* Detect if the client is Claude Code or another caching-aware client
+2 -2
View File
@@ -1,12 +1,12 @@
{
"name": "omniroute",
"version": "3.3.4",
"version": "3.3.5",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "omniroute",
"version": "3.3.4",
"version": "3.3.5",
"hasInstallScript": true,
"license": "MIT",
"workspaces": [
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "omniroute",
"version": "3.3.4",
"version": "3.3.5",
"description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
"type": "module",
"bin": {
@@ -0,0 +1,136 @@
"use client";
import { useEffect, useState } from "react";
import { Card } from "@/shared/components";
import { useTranslations } from "next-intl";
export default function DiversityScoreCard() {
const [data, setData] = useState<any>(null);
const [loading, setLoading] = useState(true);
const t = useTranslations("analytics");
useEffect(() => {
fetch("/api/analytics/diversity")
.then((res) => res.json())
.then((json) => {
setData(json);
setLoading(false);
})
.catch((err) => {
console.error(err);
setLoading(false);
});
}, []);
if (loading || !data) {
return (
<Card className="p-5 flex flex-col justify-center items-center h-full min-h-[200px]">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary"></div>
</Card>
);
}
const scorePercentage = Math.round((data.score || 0) * 100);
let riskColor = "text-green-500";
let gaugeColor = "bg-green-500";
let riskLabel = "Healthy Distribution";
if (scorePercentage < 40) {
riskColor = "text-red-500";
gaugeColor = "bg-red-500";
riskLabel = "High Vendor Lock-in Risk";
} else if (scorePercentage < 70) {
riskColor = "text-amber-500";
gaugeColor = "bg-amber-500";
riskLabel = "Moderate Distribution";
}
return (
<Card className="p-5 flex flex-col h-full bg-[var(--card-bg,#1e1e2e)] relative overflow-hidden group">
<div className="flex items-center gap-2 mb-4">
<span className="material-symbols-outlined text-[20px] text-cyan-400">pie_chart</span>
<h3 className="font-semibold text-[var(--text-primary,#fff)] flex-1">
Provider Diversity Score
</h3>
<span
className={`text-xs px-2 py-0.5 rounded-md border ${gaugeColor.replace("bg-", "border-").replace("500", "500/20")} ${gaugeColor.replace("bg-", "bg-").replace("500", "500/10")} ${riskColor}`}
>
Shannon Entropy
</span>
</div>
<div className="flex items-center justify-between mt-2 mb-6">
<div className="flex flex-col">
<span className={`text-4xl font-bold tabular-nums tracking-tight ${riskColor}`}>
{scorePercentage}%
</span>
<span className="text-sm text-[var(--text-muted,#aaaaaa)] mt-1">{riskLabel}</span>
</div>
{/* Simple CSS Donut */}
<div className="relative w-20 h-20 flex-shrink-0">
<svg className="w-full h-full transform -rotate-90" viewBox="0 0 36 36">
<path
className="text-[var(--border,#333)]"
strokeWidth="4"
stroke="currentColor"
fill="none"
d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
/>
<path
className={riskColor}
strokeWidth="4"
strokeDasharray={`${scorePercentage}, 100`}
stroke="currentColor"
fill="none"
strokeLinecap="round"
d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
/>
</svg>
</div>
</div>
<div className="space-y-4 flex-1">
<p className="text-xs uppercase tracking-wider font-semibold text-[var(--text-muted,#888)]">
Provider Share
</p>
{Object.keys(data.providers || {}).length === 0 ? (
<div className="text-sm text-[var(--text-secondary,#666)] py-2">
No recent usage data available.
</div>
) : (
<div className="space-y-3">
{Object.entries(data.providers)
.sort(([, a]: any, [, b]: any) => b.share - a.share)
.slice(0, 4) // Top 4 providers
.map(([provider, stat]: [string, any]) => (
<div key={provider} className="flex flex-col gap-1.5">
<div className="flex items-center justify-between text-sm">
<span className="font-medium text-[var(--text-primary,#ddd)] capitalize">
{provider}
</span>
<span className="font-mono text-[var(--text-muted,#aaa)]">
{Math.round(stat.share * 100)}%
</span>
</div>
<div className="w-full h-1.5 bg-[var(--surface,#333)] rounded-full overflow-hidden">
<div
className={`h-full ${gaugeColor} rounded-full`}
style={{ width: `${Math.round(stat.share * 100)}%` }}
/>
</div>
</div>
))}
</div>
)}
</div>
<div className="mt-4 pt-4 border-t border-[var(--border,#333)] flex justify-between text-[11px] text-[var(--text-muted,#777)]">
<span>Window: {data.windowSize} reqs</span>
<span>Based on Last {Math.round(data.ttlMs / 60000)} mins</span>
</div>
</Card>
);
}
@@ -4,6 +4,7 @@ import { useState, Suspense } from "react";
import { UsageAnalytics, CardSkeleton, SegmentedControl } from "@/shared/components";
import EvalsTab from "../usage/components/EvalsTab";
import SearchAnalyticsTab from "./SearchAnalyticsTab";
import DiversityScoreCard from "./components/DiversityScoreCard";
import { useTranslations } from "next-intl";
export default function AnalyticsPage() {
@@ -38,9 +39,14 @@ export default function AnalyticsPage() {
/>
{activeTab === "overview" && (
<Suspense fallback={<CardSkeleton />}>
<UsageAnalytics />
</Suspense>
<div className="flex flex-col gap-6">
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
<DiversityScoreCard />
</div>
<Suspense fallback={<CardSkeleton />}>
<UsageAnalytics />
</Suspense>
</div>
)}
{activeTab === "evals" && <EvalsTab />}
{activeTab === "search" && <SearchAnalyticsTab />}
@@ -0,0 +1,174 @@
"use client";
import { useState, useEffect, useCallback } from "react";
import { Button } from "@/shared/components";
import { useTranslations } from "next-intl";
interface CacheEntry {
id: string;
signature: string;
model: string;
hit_count: number;
tokens_saved: number;
created_at: string;
expires_at: string;
}
interface Pagination {
page: number;
limit: number;
total: number;
totalPages: number;
}
export default function CacheEntriesTab() {
const t = useTranslations("cache");
const [entries, setEntries] = useState<CacheEntry[]>([]);
const [pagination, setPagination] = useState<Pagination>({
page: 1,
limit: 20,
total: 0,
totalPages: 0,
});
const [loading, setLoading] = useState(true);
const [search, setSearch] = useState("");
const [deleting, setDeleting] = useState<string | null>(null);
const fetchEntries = useCallback(
async (page = 1) => {
setLoading(true);
try {
const params = new URLSearchParams({ page: String(page), limit: String(pagination.limit) });
if (search) params.set("search", search);
const res = await fetch(`/api/cache/entries?${params}`);
if (res.ok) {
const data = await res.json();
setEntries(data.entries);
setPagination(data.pagination);
}
} catch {
// ignore
} finally {
setLoading(false);
}
},
[search, pagination.limit]
);
useEffect(() => {
fetchEntries();
}, [fetchEntries]);
const handleDelete = async (signature: string) => {
setDeleting(signature);
try {
await fetch(`/api/cache/entries?signature=${encodeURIComponent(signature)}`, {
method: "DELETE",
});
await fetchEntries(pagination.page);
} finally {
setDeleting(null);
}
};
const formatDate = (dateStr: string) => {
return new Date(dateStr).toLocaleString();
};
return (
<div className="flex flex-col gap-4">
<div className="flex items-center gap-3">
<input
type="text"
placeholder={t("searchEntries")}
value={search}
onChange={(e) => setSearch(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && fetchEntries()}
className="flex-1 px-3 py-2 text-sm rounded-lg border border-border bg-surface text-text-main placeholder:text-text-muted"
/>
<Button variant="secondary" size="sm" onClick={() => fetchEntries()}>
{t("search")}
</Button>
</div>
{loading ? (
<div className="text-sm text-text-muted">{t("loading")}</div>
) : entries.length === 0 ? (
<div className="text-sm text-text-muted text-center py-8">{t("noEntries")}</div>
) : (
<>
<div className="overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="text-left text-xs text-text-muted border-b border-border/30">
<th className="pb-2 pr-4">{t("signature")}</th>
<th className="pb-2 pr-4">{t("model")}</th>
<th className="pb-2 pr-4">{t("hits")}</th>
<th className="pb-2 pr-4">{t("tokensSaved")}</th>
<th className="pb-2 pr-4">{t("created")}</th>
<th className="pb-2 pr-4">{t("expires")}</th>
<th className="pb-2">{t("actions")}</th>
</tr>
</thead>
<tbody>
{entries.map((entry) => (
<tr key={entry.id} className="border-b border-border/20">
<td className="py-2 pr-4 font-mono text-xs">
{entry.signature.slice(0, 12)}...
</td>
<td className="py-2 pr-4">{entry.model}</td>
<td className="py-2 pr-4 tabular-nums">{entry.hit_count}</td>
<td className="py-2 pr-4 tabular-nums text-green-500">
{entry.tokens_saved.toLocaleString()}
</td>
<td className="py-2 pr-4 text-xs text-text-muted">
{formatDate(entry.created_at)}
</td>
<td className="py-2 pr-4 text-xs text-text-muted">
{formatDate(entry.expires_at)}
</td>
<td className="py-2">
<button
onClick={() => handleDelete(entry.signature)}
disabled={deleting === entry.signature}
className="text-xs text-red-400 hover:text-red-300 disabled:opacity-50"
>
{deleting === entry.signature ? "..." : "🗑️"}
</button>
</td>
</tr>
))}
</tbody>
</table>
</div>
{/* Pagination */}
{pagination.totalPages > 1 && (
<div className="flex items-center justify-center gap-2 pt-2">
<Button
variant="secondary"
size="sm"
onClick={() => fetchEntries(pagination.page - 1)}
disabled={pagination.page <= 1}
>
</Button>
<span className="text-sm text-text-muted">
{pagination.page} / {pagination.totalPages}
</span>
<Button
variant="secondary"
size="sm"
onClick={() => fetchEntries(pagination.page + 1)}
disabled={pagination.page >= pagination.totalPages}
>
</Button>
</div>
)}
</>
)}
</div>
);
}
+362 -139
View File
@@ -4,6 +4,7 @@ import { useState, useEffect, useCallback } from "react";
import { Card, Button, EmptyState } from "@/shared/components";
import { useNotificationStore } from "@/store/notificationStore";
import { useTranslations } from "next-intl";
import CacheEntriesTab from "./components/CacheEntriesTab";
// ─── Types ───────────────────────────────────────────────────────────────────
@@ -16,13 +17,44 @@ interface SemanticCacheStats {
tokensSaved: number;
}
interface PromptCacheProviderStats {
requests: number;
inputTokens: number;
cachedTokens: number;
cacheCreationTokens: number;
}
interface PromptCacheMetrics {
totalRequests: number;
requestsWithCacheControl: number;
totalInputTokens: number;
totalCachedTokens: number;
totalCacheCreationTokens: number;
tokensSaved: number;
estimatedCostSaved: number;
byProvider: Record<string, PromptCacheProviderStats>;
byStrategy: Record<string, PromptCacheProviderStats>;
lastUpdated: string;
}
interface IdempotencyStats {
activeKeys: number;
windowMs: number;
}
interface CacheTrendPoint {
timestamp: string;
requests: number;
cachedRequests: number;
inputTokens: number;
cachedTokens: number;
cacheCreationTokens: number;
}
interface CacheStats {
semanticCache: SemanticCacheStats;
promptCache: PromptCacheMetrics | null;
trend: CacheTrendPoint[];
idempotency: IdempotencyStats;
}
@@ -107,6 +139,7 @@ export default function CachePage() {
const [stats, setStats] = useState<CacheStats | null>(null);
const [loading, setLoading] = useState(true);
const [clearing, setClearing] = useState(false);
const [activeTab, setActiveTab] = useState<"overview" | "entries">("overview");
const notify = useNotificationStore();
const fetchStats = useCallback(async () => {
@@ -136,27 +169,32 @@ export default function CachePage() {
const res = await fetch("/api/cache", { method: "DELETE" });
if (res.ok) {
const data = await res.json();
notify.add({
type: "success",
message: t("clearSuccess", { count: data.expiredRemoved ?? 0 }),
});
notify.success(t("clearSuccess", { count: data.expiredRemoved ?? 0 }));
await fetchStats();
} else {
notify.add({ type: "error", message: t("clearError") });
notify.error(t("clearError"));
}
} catch (error) {
console.error("[CachePage] Failed to clear cache:", error);
notify.add({ type: "error", message: t("clearError") });
notify.error(t("clearError"));
} finally {
setClearing(false);
}
};
const sc = stats?.semanticCache;
const pc = stats?.promptCache;
const trend = stats?.trend ?? [];
const idp = stats?.idempotency;
const hitRate = sc ? parseFloat(sc.hitRate) : 0;
const totalRequests = sc ? sc.hits + sc.misses : 0;
const promptCacheHitRate =
pc && pc.totalRequests > 0 ? (pc.requestsWithCacheControl / pc.totalRequests) * 100 : 0;
const providerEntries = pc ? Object.entries(pc.byProvider) : [];
const maxTrendRequests = Math.max(1, ...trend.map((p) => p.requests));
return (
<div className="flex flex-col gap-6">
{/* Header */}
@@ -190,149 +228,334 @@ export default function CachePage() {
</div>
</div>
{/* Loading skeleton */}
{loading && (
<div
className="grid grid-cols-2 md:grid-cols-4 gap-4"
aria-busy="true"
aria-label="Loading cache statistics"
{/* Tab navigation */}
<div className="flex gap-1 p-1 rounded-lg bg-black/5 dark:bg-white/5 w-fit">
<button
onClick={() => setActiveTab("overview")}
className={`px-4 py-2 rounded-md text-sm font-medium transition-all ${
activeTab === "overview"
? "bg-white dark:bg-white/10 text-text-main shadow-sm"
: "text-text-muted hover:text-text-main"
}`}
>
{Array.from({ length: 4 }).map((_, i) => (
<div key={i} className="h-24 rounded-xl bg-surface-raised animate-pulse" />
))}
</div>
)}
{t("overview")}
</button>
<button
onClick={() => setActiveTab("entries")}
className={`px-4 py-2 rounded-md text-sm font-medium transition-all ${
activeTab === "entries"
? "bg-white dark:bg-white/10 text-text-main shadow-sm"
: "text-text-muted hover:text-text-main"
}`}
>
{t("entries")}
</button>
</div>
{/* Error / empty state */}
{!loading && !stats && (
<EmptyState
icon="cached"
title={t("unavailable")}
description={t("unavailableDesc")}
actionLabel={t("refresh")}
onAction={() => void fetchStats()}
/>
)}
{/* Entries tab */}
{activeTab === "entries" && <CacheEntriesTab />}
{/* Main content */}
{!loading && stats && (
{/* Overview tab content */}
{activeTab === "overview" && (
<>
{/* Stats grid */}
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
<StatCard
icon="memory"
label={t("memoryEntries")}
value={sc?.memoryEntries ?? 0}
sub={t("memoryEntriesSub")}
/>
<StatCard
icon="storage"
label={t("dbEntries")}
value={sc?.dbEntries ?? 0}
sub={t("dbEntriesSub")}
/>
<StatCard
icon="trending_up"
label={t("cacheHits")}
value={sc?.hits ?? 0}
sub={t("cacheHitsSub", { total: totalRequests })}
valueClass="text-green-500"
/>
<StatCard
icon="token"
label={t("tokensSaved")}
value={(sc?.tokensSaved ?? 0).toLocaleString()}
sub={t("tokensSavedSub")}
valueClass="text-blue-400"
/>
</div>
{/* Hit rate + breakdown */}
<Card>
<div className="p-5 flex flex-col gap-4">
<div className="flex items-center justify-between">
<h2 className="font-medium text-sm">{t("performance")}</h2>
<span className="text-xs text-text-muted">
{t("autoRefresh", { seconds: REFRESH_INTERVAL_SECONDS })}
</span>
</div>
<HitRateBar hitRate={hitRate} label={t("hitRate")} />
<div className="grid grid-cols-3 gap-4 pt-3 border-t border-border/30 text-center">
<div>
<div className="text-lg font-semibold tabular-nums text-green-500">
{sc?.hits ?? 0}
</div>
<div className="text-xs text-text-muted mt-0.5">{t("hits")}</div>
</div>
<div>
<div className="text-lg font-semibold tabular-nums text-red-400">
{sc?.misses ?? 0}
</div>
<div className="text-xs text-text-muted mt-0.5">{t("misses")}</div>
</div>
<div>
<div className="text-lg font-semibold tabular-nums">{totalRequests}</div>
<div className="text-xs text-text-muted mt-0.5">{t("total")}</div>
</div>
</div>
{/* Loading skeleton */}
{loading && (
<div
className="grid grid-cols-2 md:grid-cols-4 gap-4"
aria-busy="true"
aria-label="Loading cache statistics"
>
{Array.from({ length: 4 }).map((_, i) => (
<div key={i} className="h-24 rounded-xl bg-surface-raised animate-pulse" />
))}
</div>
</Card>
)}
{/* Cache behavior */}
<Card>
<div className="p-5 flex flex-col gap-3">
<h2 className="font-medium text-sm">{t("behavior")}</h2>
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
<InfoRow icon="info">{t("behaviorDeterministic")}</InfoRow>
<InfoRow icon="info">
{t.rich("behaviorBypass", {
header: () => (
<code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
X-OmniRoute-No-Cache: true
</code>
),
})}
</InfoRow>
<InfoRow icon="info">{t("behaviorTwoTier")}</InfoRow>
<InfoRow icon="info">
{t.rich("behaviorTtl", {
envVar: () => (
<code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
SEMANTIC_CACHE_TTL_MS
</code>
),
})}
</InfoRow>
</div>
</div>
</Card>
{/* Error / empty state */}
{!loading && !stats && (
<EmptyState
icon="cached"
title={t("unavailable")}
description={t("unavailableDesc")}
actionLabel={t("refresh")}
onAction={() => void fetchStats()}
/>
)}
{/* Idempotency */}
<Card>
<div className="p-5 flex flex-col gap-3">
<div className="flex items-center gap-2">
<span
className="material-symbols-outlined text-base text-text-muted"
aria-hidden="true"
>
fingerprint
</span>
<h2 className="font-medium text-sm">{t("idempotency")}</h2>
{/* Main content */}
{!loading && stats && (
<>
{/* Stats grid */}
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
<StatCard
icon="memory"
label={t("memoryEntries")}
value={sc?.memoryEntries ?? 0}
sub={t("memoryEntriesSub")}
/>
<StatCard
icon="storage"
label={t("dbEntries")}
value={sc?.dbEntries ?? 0}
sub={t("dbEntriesSub")}
/>
<StatCard
icon="trending_up"
label={t("cacheHits")}
value={sc?.hits ?? 0}
sub={t("cacheHitsSub", { total: totalRequests })}
valueClass="text-green-500"
/>
<StatCard
icon="token"
label={t("tokensSaved")}
value={(sc?.tokensSaved ?? 0).toLocaleString()}
sub={t("tokensSavedSub")}
valueClass="text-blue-400"
/>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="p-3 rounded-lg bg-surface/50">
<div className="text-lg font-semibold tabular-nums">{idp?.activeKeys ?? 0}</div>
<div className="text-xs text-text-muted mt-0.5">{t("activeDedupKeys")}</div>
</div>
<div className="p-3 rounded-lg bg-surface/50">
<div className="text-lg font-semibold tabular-nums">
{idp ? `${(idp.windowMs / 1000).toFixed(0)}s` : "—"}
{/* Hit rate + breakdown */}
<Card>
<div className="p-5 flex flex-col gap-4">
<div className="flex items-center justify-between">
<h2 className="font-medium text-sm">{t("performance")}</h2>
<span className="text-xs text-text-muted">
{t("autoRefresh", { seconds: REFRESH_INTERVAL_SECONDS })}
</span>
</div>
<HitRateBar hitRate={hitRate} label={t("hitRate")} />
<div className="grid grid-cols-3 gap-4 pt-3 border-t border-border/30 text-center">
<div>
<div className="text-lg font-semibold tabular-nums text-green-500">
{sc?.hits ?? 0}
</div>
<div className="text-xs text-text-muted mt-0.5">{t("hits")}</div>
</div>
<div>
<div className="text-lg font-semibold tabular-nums text-red-400">
{sc?.misses ?? 0}
</div>
<div className="text-xs text-text-muted mt-0.5">{t("misses")}</div>
</div>
<div>
<div className="text-lg font-semibold tabular-nums">{totalRequests}</div>
<div className="text-xs text-text-muted mt-0.5">{t("total")}</div>
</div>
</div>
<div className="text-xs text-text-muted mt-0.5">{t("dedupWindow")}</div>
</div>
</div>
</div>
</Card>
</Card>
{/* Prompt Cache Stats */}
{pc && (
<Card>
<div className="p-5 flex flex-col gap-4">
<div className="flex items-center gap-2">
<span
className="material-symbols-outlined text-base text-text-muted"
aria-hidden="true"
>
bolt
</span>
<h2 className="font-medium text-sm">{t("promptCache")}</h2>
</div>
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
<div className="p-3 rounded-lg bg-surface/50">
<div className="text-lg font-semibold tabular-nums">
{pc.requestsWithCacheControl.toLocaleString()}
</div>
<div className="text-xs text-text-muted mt-0.5">{t("cachedRequests")}</div>
</div>
<div className="p-3 rounded-lg bg-surface/50">
<div className="text-lg font-semibold tabular-nums text-green-500">
{promptCacheHitRate.toFixed(1)}%
</div>
<div className="text-xs text-text-muted mt-0.5">{t("cacheHitRate")}</div>
</div>
<div className="p-3 rounded-lg bg-surface/50">
<div className="text-lg font-semibold tabular-nums text-blue-400">
{pc.totalCachedTokens.toLocaleString()}
</div>
<div className="text-xs text-text-muted mt-0.5">{t("cachedTokens")}</div>
</div>
<div className="p-3 rounded-lg bg-surface/50">
<div className="text-lg font-semibold tabular-nums text-purple-400">
{pc.totalCacheCreationTokens.toLocaleString()}
</div>
<div className="text-xs text-text-muted mt-0.5">
{t("cacheCreationTokens")}
</div>
</div>
</div>
{providerEntries.length > 0 && (
<div className="pt-3 border-t border-border/30">
<h3 className="text-xs font-medium text-text-muted mb-3">
{t("byProvider")}
</h3>
<div className="overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="text-left text-xs text-text-muted border-b border-border/30">
<th className="pb-2 pr-4">{t("provider")}</th>
<th className="pb-2 pr-4">{t("requests")}</th>
<th className="pb-2 pr-4">{t("inputTokens")}</th>
<th className="pb-2 pr-4">{t("cachedTokensCol")}</th>
<th className="pb-2">{t("cacheCreation")}</th>
</tr>
</thead>
<tbody>
{providerEntries.map(([provider, data]) => (
<tr key={provider} className="border-b border-border/20">
<td className="py-2 pr-4 font-medium">{provider}</td>
<td className="py-2 pr-4 tabular-nums">
{data.requests.toLocaleString()}
</td>
<td className="py-2 pr-4 tabular-nums">
{data.inputTokens.toLocaleString()}
</td>
<td className="py-2 pr-4 tabular-nums text-green-500">
{data.cachedTokens.toLocaleString()}
</td>
<td className="py-2 tabular-nums text-purple-400">
{data.cacheCreationTokens.toLocaleString()}
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
</div>
</Card>
)}
{/* Cache Trend (24h) */}
{trend.length > 0 && (
<Card>
<div className="p-5 flex flex-col gap-4">
<div className="flex items-center gap-2">
<span
className="material-symbols-outlined text-base text-text-muted"
aria-hidden="true"
>
timeline
</span>
<h2 className="font-medium text-sm">{t("trend24h")}</h2>
</div>
<div className="flex items-end gap-1 h-32">
{trend.map((point) => {
const height = Math.max(4, (point.requests / maxTrendRequests) * 100);
const cachedHeight =
point.requests > 0
? Math.max(2, (point.cachedRequests / point.requests) * height)
: 0;
const hour = new Date(point.timestamp).toLocaleTimeString([], {
hour: "2-digit",
minute: "2-digit",
hour12: false,
});
return (
<div
key={point.timestamp}
className="flex-1 flex flex-col items-center gap-1 group relative"
>
<div className="absolute bottom-full mb-1 hidden group-hover:block bg-surface-raised border border-border rounded px-2 py-1 text-xs whitespace-nowrap z-10">
{hour}: {point.requests} {t("requests").toLowerCase()},{" "}
{point.cachedRequests} {t("cached").toLowerCase()}
</div>
<div className="w-full flex flex-col justify-end h-full gap-px">
<div
className="w-full bg-green-500/30 rounded-t"
style={{ height: `${cachedHeight}%` }}
/>
<div
className="w-full bg-text-muted/20 rounded-t"
style={{ height: `${height - cachedHeight}%` }}
/>
</div>
<span className="text-[10px] text-text-muted truncate w-full text-center">
{hour.split(":")[0]}
</span>
</div>
);
})}
</div>
<div className="flex items-center gap-4 text-xs text-text-muted">
<div className="flex items-center gap-1.5">
<div className="w-3 h-3 rounded bg-text-muted/20" />
<span>{t("total")}</span>
</div>
<div className="flex items-center gap-1.5">
<div className="w-3 h-3 rounded bg-green-500/30" />
<span>{t("cached")}</span>
</div>
</div>
</div>
</Card>
)}
{/* Cache behavior */}
<Card>
<div className="p-5 flex flex-col gap-3">
<h2 className="font-medium text-sm">{t("behavior")}</h2>
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
<InfoRow icon="info">{t("behaviorDeterministic")}</InfoRow>
<InfoRow icon="info">
{t.rich("behaviorBypass", {
header: () => (
<code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
X-OmniRoute-No-Cache: true
</code>
),
})}
</InfoRow>
<InfoRow icon="info">{t("behaviorTwoTier")}</InfoRow>
<InfoRow icon="info">
{t.rich("behaviorTtl", {
envVar: () => (
<code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
SEMANTIC_CACHE_TTL_MS
</code>
),
})}
</InfoRow>
</div>
</div>
</Card>
{/* Idempotency */}
<Card>
<div className="p-5 flex flex-col gap-3">
<div className="flex items-center gap-2">
<span
className="material-symbols-outlined text-base text-text-muted"
aria-hidden="true"
>
fingerprint
</span>
<h2 className="font-medium text-sm">{t("idempotency")}</h2>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="p-3 rounded-lg bg-surface/50">
<div className="text-lg font-semibold tabular-nums">
{idp?.activeKeys ?? 0}
</div>
<div className="text-xs text-text-muted mt-0.5">{t("activeDedupKeys")}</div>
</div>
<div className="p-3 rounded-lg bg-surface/50">
<div className="text-lg font-semibold tabular-nums">
{idp ? `${(idp.windowMs / 1000).toFixed(0)}s` : "—"}
</div>
<div className="text-xs text-text-muted mt-0.5">{t("dedupWindow")}</div>
</div>
</div>
</div>
</Card>
</>
)}
</>
)}
</div>
@@ -802,8 +802,6 @@ export default function ProviderDetailPage() {
const { copied, copy } = useCopyToClipboard();
const t = useTranslations("providers");
const notify = useNotificationStore();
const hasAutoOpened = useRef(false);
const userDismissed = useRef(false);
const [proxyTarget, setProxyTarget] = useState(null);
const [proxyConfig, setProxyConfig] = useState(null);
const [connProxyMap, setConnProxyMap] = useState<
@@ -989,25 +987,6 @@ export default function ProviderDetailPage() {
}
}, [loading, connections, loadConnProxies]);
// Auto-open Add Connection modal when no connections exist (better UX)
// Only fires once on initial load, not on HMR remounts or after user dismissal
useEffect(() => {
if (
!loading &&
connections.length === 0 &&
providerInfo &&
!isCompatible &&
!hasAutoOpened.current &&
!userDismissed.current
) {
hasAutoOpened.current = true;
if (isOAuth) {
setShowOAuthModal(true);
} else {
setShowAddApiKeyModal(true);
}
}
}, [loading]); // eslint-disable-line react-hooks/exhaustive-deps
const handleSetAlias = async (modelId, alias, providerAliasOverride = providerAlias) => {
const fullModel = `${providerAliasOverride}/${modelId}`;
@@ -2428,7 +2407,6 @@ export default function ProviderDetailPage() {
providerInfo={providerInfo}
onSuccess={handleOAuthSuccess}
onClose={() => {
userDismissed.current = true;
setShowOAuthModal(false);
}}
/>
@@ -2437,7 +2415,6 @@ export default function ProviderDetailPage() {
isOpen={showOAuthModal}
onSuccess={handleOAuthSuccess}
onClose={() => {
userDismissed.current = true;
setShowOAuthModal(false);
}}
/>
@@ -2448,7 +2425,6 @@ export default function ProviderDetailPage() {
providerInfo={providerInfo}
onSuccess={handleOAuthSuccess}
onClose={() => {
userDismissed.current = true;
setShowOAuthModal(false);
}}
/>
@@ -0,0 +1,191 @@
"use client";
import { useState, useEffect } from "react";
import { Card, Button } from "@/shared/components";
import { useTranslations } from "next-intl";
interface CacheConfig {
semanticCacheEnabled: boolean;
semanticCacheMaxSize: number;
semanticCacheTTL: number;
promptCacheEnabled: boolean;
promptCacheStrategy: "auto" | "system-only" | "manual";
alwaysPreserveClientCache: "auto" | "always" | "never";
}
export default function CacheSettingsTab() {
const t = useTranslations("settings");
const [config, setConfig] = useState<CacheConfig>({
semanticCacheEnabled: true,
semanticCacheMaxSize: 100,
semanticCacheTTL: 1800000,
promptCacheEnabled: true,
promptCacheStrategy: "auto",
alwaysPreserveClientCache: "auto",
});
const [saving, setSaving] = useState(false);
const [loading, setLoading] = useState(true);
useEffect(() => {
fetch("/api/settings/cache-config")
.then((r) => (r.ok ? r.json() : null))
.then((data) => {
if (data) setConfig(data);
})
.catch(() => {})
.finally(() => setLoading(false));
}, []);
const handleSave = async () => {
setSaving(true);
try {
await fetch("/api/settings/cache-config", {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(config),
});
} finally {
setSaving(false);
}
};
if (loading) {
return (
<Card className="p-6">
<p className="text-sm text-text-muted">{t("loading")}</p>
</Card>
);
}
return (
<Card className="p-6">
<h3 className="text-lg font-semibold text-text-main flex items-center gap-2 mb-4">
<span className="material-symbols-outlined text-[20px]">cached</span>
{t("cacheSettings")}
</h3>
<div className="space-y-6">
{/* Semantic Cache */}
<div className="space-y-3">
<h4 className="text-sm font-medium text-text-main">{t("semanticCache")}</h4>
<label className="flex items-center justify-between">
<span className="text-sm text-text-muted">{t("enabled")}</span>
<button
onClick={() =>
setConfig((c) => ({ ...c, semanticCacheEnabled: !c.semanticCacheEnabled }))
}
className={`relative w-10 h-5 rounded-full transition-colors ${
config.semanticCacheEnabled ? "bg-green-500" : "bg-border"
}`}
>
<span
className={`absolute top-0.5 w-4 h-4 rounded-full bg-white transition-transform ${
config.semanticCacheEnabled ? "left-5" : "left-0.5"
}`}
/>
</button>
</label>
<label className="flex items-center justify-between">
<span className="text-sm text-text-muted">{t("maxEntries")}</span>
<input
type="number"
min={1}
max={1000}
value={config.semanticCacheMaxSize}
onChange={(e) =>
setConfig((c) => ({ ...c, semanticCacheMaxSize: parseInt(e.target.value) || 100 }))
}
className="w-24 px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
/>
</label>
<label className="flex items-center justify-between">
<span className="text-sm text-text-muted">{t("ttlMinutes")}</span>
<input
type="number"
min={1}
max={1440}
value={Math.round(config.semanticCacheTTL / 60000)}
onChange={(e) =>
setConfig((c) => ({
...c,
semanticCacheTTL: (parseInt(e.target.value) || 30) * 60000,
}))
}
className="w-24 px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
/>
</label>
</div>
{/* Prompt Cache */}
<div className="space-y-3 pt-4 border-t border-border/30">
<h4 className="text-sm font-medium text-text-main">{t("promptCache")}</h4>
<label className="flex items-center justify-between">
<span className="text-sm text-text-muted">{t("enabled")}</span>
<button
onClick={() =>
setConfig((c) => ({ ...c, promptCacheEnabled: !c.promptCacheEnabled }))
}
className={`relative w-10 h-5 rounded-full transition-colors ${
config.promptCacheEnabled ? "bg-green-500" : "bg-border"
}`}
>
<span
className={`absolute top-0.5 w-4 h-4 rounded-full bg-white transition-transform ${
config.promptCacheEnabled ? "left-5" : "left-0.5"
}`}
/>
</button>
</label>
<label className="flex items-center justify-between">
<span className="text-sm text-text-muted">{t("strategy")}</span>
<select
value={config.promptCacheStrategy}
onChange={(e) =>
setConfig((c) => ({
...c,
promptCacheStrategy: e.target.value as CacheConfig["promptCacheStrategy"],
}))
}
className="px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
>
<option value="auto">Auto</option>
<option value="system-only">System Only</option>
<option value="manual">Manual</option>
</select>
</label>
<label className="flex items-center justify-between">
<span className="text-sm text-text-muted">{t("preserveClientCache")}</span>
<select
value={config.alwaysPreserveClientCache}
onChange={(e) =>
setConfig((c) => ({
...c,
alwaysPreserveClientCache: e.target
.value as CacheConfig["alwaysPreserveClientCache"],
}))
}
className="px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
>
<option value="auto">Auto</option>
<option value="always">Always</option>
<option value="never">Never</option>
</select>
</label>
</div>
{/* Save */}
<div className="pt-4 border-t border-border/30">
<Button onClick={handleSave} disabled={saving} size="sm">
{saving ? t("saving") : t("save")}
</Button>
</div>
</div>
</Card>
);
}
@@ -18,6 +18,7 @@ import ModelAliasesTab from "./components/ModelAliasesTab";
import BackgroundDegradationTab from "./components/BackgroundDegradationTab";
import CacheStatsCard from "./components/CacheStatsCard";
import CacheSettingsTab from "./components/CacheSettingsTab";
import ResilienceTab from "./components/ResilienceTab";
const tabs = [
@@ -89,6 +90,7 @@ export default function SettingsPage() {
<CodexServiceTierTab />
<SystemPromptTab />
<CacheStatsCard />
<CacheSettingsTab />
</div>
)}
@@ -28,6 +28,7 @@ const QUOTA_BAR_YELLOW_THRESHOLD = 20;
// Provider display config
const PROVIDER_CONFIG = {
antigravity: { label: "Antigravity", color: "#F59E0B" },
"gemini-cli": { label: "Gemini CLI", color: "#4285F4" },
github: { label: "GitHub Copilot", color: "#333" },
kiro: { label: "Kiro AI", color: "#FF6B35" },
codex: { label: "OpenAI Codex", color: "#10A37F" },
@@ -279,12 +280,13 @@ export default function ProviderLimits() {
const sortedConnections = useMemo(() => {
const priority = {
antigravity: 1,
github: 2,
codex: 3,
claude: 4,
kiro: 5,
glm: 6,
"kimi-coding": 7,
"gemini-cli": 2,
github: 3,
codex: 4,
claude: 5,
kiro: 6,
glm: 7,
"kimi-coding": 8,
};
return [...filteredConnections].sort(
(a, b) => (priority[a.provider] || 9) - (priority[b.provider] || 9)
@@ -624,6 +626,7 @@ export default function ProviderLimits() {
>
{/* Model label */}
<span
title={q.modelKey || q.name}
className="text-[11px] font-semibold py-0.5 px-2 rounded whitespace-nowrap min-w-[60px] text-center"
style={{ background: colors.bg, color: colors.text }}
>
@@ -11,15 +11,6 @@ const PROVIDER_PLAN_FALLBACKS = new Set([
]);
const QUOTA_LABEL_MAP: Record<string, string> = {
"gemini-3-pro-high": "G3 Pro",
"gemini-3-pro-low": "G3 Pro Low",
"gemini-3-flash": "G3 Flash",
"gemini-2.5-flash": "G2.5 Flash",
"claude-opus-4-6-thinking": "Opus 4.6 Tk",
"claude-opus-4-5-thinking": "Opus 4.5 Tk",
"claude-opus-4-5": "Opus 4.5",
"claude-sonnet-4-5-thinking": "Sonnet 4.5 Tk",
"claude-sonnet-4-5": "Sonnet 4.5",
chat: "Chat",
completions: "Completions",
premium_interactions: "Premium",
@@ -254,6 +245,14 @@ export function parseQuotaData(provider, data) {
}
break;
case "gemini-cli":
if (data.quotas) {
Object.entries(data.quotas).forEach(([modelKey, quota]: [string, any]) => {
normalizedQuotas.push(normalizeQuotaEntry(modelKey, quota, { modelKey }));
});
}
break;
default:
// Generic fallback for unknown providers
if (data.quotas) {
+13
View File
@@ -0,0 +1,13 @@
import { NextResponse } from "next/server";
import { getDiversityReport } from "../../../../../open-sse/services/autoCombo/providerDiversity";
export const dynamic = "force-dynamic";
export async function GET() {
try {
const report = getDiversityReport();
return NextResponse.json(report);
} catch (error: any) {
return NextResponse.json({ error: error.message }, { status: 500 });
}
}
+95
View File
@@ -0,0 +1,95 @@
import { NextRequest, NextResponse } from "next/server";
import { getDbInstance } from "@/lib/db/core";
interface CacheEntry {
id: string;
signature: string;
model: string;
hit_count: number;
tokens_saved: number;
created_at: string;
expires_at: string;
}
export async function GET(req: NextRequest) {
try {
const { searchParams } = new URL(req.url);
const page = Math.max(1, parseInt(searchParams.get("page") || "1", 10));
const limit = Math.min(100, Math.max(1, parseInt(searchParams.get("limit") || "20", 10)));
const search = searchParams.get("search") || "";
const model = searchParams.get("model") || "";
const sortBy = searchParams.get("sortBy") || "created_at";
const sortOrder = searchParams.get("sortOrder") || "desc";
const db = getDbInstance();
const offset = (page - 1) * limit;
const conditions: string[] = [];
const params: unknown[] = [];
if (search) {
conditions.push("(signature LIKE ? OR model LIKE ?)");
params.push(`%${search}%`, `%${search}%`);
}
if (model) {
conditions.push("model = ?");
params.push(model);
}
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
const validSortColumns = ["created_at", "expires_at", "hit_count", "tokens_saved", "model"];
const orderBy = validSortColumns.includes(sortBy) ? sortBy : "created_at";
const order = sortOrder === "asc" ? "ASC" : "DESC";
const countRow = db
.prepare(`SELECT COUNT(*) as total FROM semantic_cache ${whereClause}`)
.get(...params) as { total: number };
const entries = db
.prepare(
`SELECT id, signature, model, hit_count, tokens_saved, created_at, expires_at
FROM semantic_cache ${whereClause}
ORDER BY ${orderBy} ${order}
LIMIT ? OFFSET ?`
)
.all(...params, limit, offset) as CacheEntry[];
return NextResponse.json({
entries,
pagination: {
page,
limit,
total: countRow?.total || 0,
totalPages: Math.ceil((countRow?.total || 0) / limit),
},
});
} catch (error) {
return NextResponse.json({ error: String(error) }, { status: 500 });
}
}
export async function DELETE(req: NextRequest) {
try {
const { searchParams } = new URL(req.url);
const signature = searchParams.get("signature");
const model = searchParams.get("model");
const db = getDbInstance();
if (signature) {
db.prepare("DELETE FROM semantic_cache WHERE signature = ?").run(signature);
return NextResponse.json({ ok: true, deleted: 1 });
}
if (model) {
const result = db.prepare("DELETE FROM semantic_cache WHERE model = ?").run(model);
return NextResponse.json({ ok: true, deleted: result.changes });
}
return NextResponse.json({ error: "Provide signature or model parameter" }, { status: 400 });
} catch (error) {
return NextResponse.json({ error: String(error) }, { status: 500 });
}
}
+9 -15
View File
@@ -8,21 +8,26 @@ import {
invalidateStale,
} from "@/lib/semanticCache";
import { getIdempotencyStats } from "@/lib/idempotencyLayer";
import { getCacheMetrics, getCacheTrend } from "@/lib/db/settings";
function errorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}
/**
* GET /api/cache Cache statistics
*/
export async function GET() {
export async function GET(req: NextRequest) {
try {
const { searchParams } = new URL(req.url);
const trendHours = parseInt(searchParams.get("trendHours") || "24", 10);
const cacheStats = getCacheStats();
const idempotencyStats = getIdempotencyStats();
const promptCacheMetrics = await getCacheMetrics();
const trend = await getCacheTrend(trendHours);
return NextResponse.json({
semanticCache: cacheStats,
promptCache: promptCacheMetrics,
trend,
idempotency: idempotencyStats,
});
} catch (error) {
@@ -30,17 +35,6 @@ export async function GET() {
}
}
/**
* DELETE /api/cache Clear all caches or targeted invalidation.
*
* Exactly one optional query parameter may be provided:
* ?model=<name> invalidate all entries for a specific model
* ?signature=<hex> invalidate a single entry by its SHA-256 signature
* ?staleMs=<number> invalidate entries older than N milliseconds
* (no params) clear all cache entries
*
* Providing more than one parameter returns 400 Bad Request.
*/
export async function DELETE(req: NextRequest) {
try {
const { searchParams } = new URL(req.url);
+68 -13
View File
@@ -139,19 +139,7 @@ const PROVIDER_MODELS_CONFIG: Record<string, ProviderModelsConfigEntry> = {
name: m.displayName || (m.name || "").replace(/^models\//, ""),
})),
},
"gemini-cli": {
url: "https://generativelanguage.googleapis.com/v1beta/models",
method: "GET",
headers: { "Content-Type": "application/json" },
authHeader: "Authorization",
authPrefix: "Bearer ",
parseResponse: (data) =>
(data.models || []).map((m) => ({
...m,
id: (m.name || m.id || "").replace(/^models\//, ""),
name: m.displayName || (m.name || "").replace(/^models\//, ""),
})),
},
// gemini-cli handled via retrieveUserQuota (see GET handler)
qwen: {
url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models",
method: "GET",
@@ -505,6 +493,73 @@ export async function GET(
return buildResponse({ provider, connectionId, models });
}
if (provider === "gemini-cli") {
// Gemini CLI doesn't have a /models endpoint. Instead, query the quota
// endpoint to discover available models from the quota buckets.
if (!accessToken) {
return NextResponse.json(
{ error: "No access token for Gemini CLI. Please reconnect OAuth." },
{ status: 400 }
);
}
const psd = asRecord(connection.providerSpecificData);
const projectId =
connection.projectId || psd.projectId || null;
if (!projectId) {
return NextResponse.json(
{ error: "Gemini CLI project ID not available. Please reconnect OAuth." },
{ status: 400 }
);
}
try {
const quotaRes = await fetch(
"https://cloudcode-pa.googleapis.com/v1internal:retrieveUserQuota",
{
method: "POST",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
body: JSON.stringify({ project: projectId }),
signal: AbortSignal.timeout(10000),
}
);
if (!quotaRes.ok) {
const errText = await quotaRes.text();
console.log(`[models] Gemini CLI quota fetch failed (${quotaRes.status}):`, errText);
return NextResponse.json(
{ error: `Failed to fetch Gemini CLI models: ${quotaRes.status}` },
{ status: quotaRes.status }
);
}
const quotaData = await quotaRes.json();
const buckets: Array<{ modelId?: string; tokenType?: string }> =
quotaData.buckets || [];
const models = buckets
.filter((b) => b.modelId)
.map((b) => ({
id: b.modelId,
name: b.modelId,
owned_by: "google",
}));
return buildResponse({ provider, connectionId, models });
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
console.log("[models] Gemini CLI model fetch error:", msg);
return NextResponse.json(
{ error: "Failed to fetch Gemini CLI models" },
{ status: 500 }
);
}
}
if (isAnthropicCompatibleProvider(provider)) {
let baseUrl = getProviderBaseUrl(connection.providerSpecificData);
if (!baseUrl) {
@@ -0,0 +1,73 @@
import { NextRequest, NextResponse } from "next/server";
import { getSettings, updateSettings } from "@/lib/localDb";
import { isAuthenticated } from "@/shared/utils/apiAuth";
const CACHE_CONFIG_KEYS = [
"semanticCacheEnabled",
"semanticCacheMaxSize",
"semanticCacheTTL",
"promptCacheEnabled",
"promptCacheStrategy",
"alwaysPreserveClientCache",
] as const;
const DEFAULTS = {
semanticCacheEnabled: true,
semanticCacheMaxSize: 100,
semanticCacheTTL: 1800000,
promptCacheEnabled: true,
promptCacheStrategy: "auto",
alwaysPreserveClientCache: "auto",
};
export async function GET(request: NextRequest) {
if (!(await isAuthenticated(request))) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
try {
const settings = await getSettings();
const config: Record<string, unknown> = {};
for (const key of CACHE_CONFIG_KEYS) {
config[key] = settings[key] ?? DEFAULTS[key];
}
return NextResponse.json(config);
} catch (error) {
return NextResponse.json({ error: String(error) }, { status: 500 });
}
}
export async function PUT(request: NextRequest) {
if (!(await isAuthenticated(request))) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
try {
const body = await request.json();
const updates: Record<string, unknown> = {};
if (typeof body.semanticCacheEnabled === "boolean") {
updates.semanticCacheEnabled = body.semanticCacheEnabled;
}
if (typeof body.semanticCacheMaxSize === "number" && body.semanticCacheMaxSize > 0) {
updates.semanticCacheMaxSize = body.semanticCacheMaxSize;
}
if (typeof body.semanticCacheTTL === "number" && body.semanticCacheTTL > 0) {
updates.semanticCacheTTL = body.semanticCacheTTL;
}
if (typeof body.promptCacheEnabled === "boolean") {
updates.promptCacheEnabled = body.promptCacheEnabled;
}
if (["auto", "system-only", "manual"].includes(body.promptCacheStrategy)) {
updates.promptCacheStrategy = body.promptCacheStrategy;
}
if (["auto", "always", "never"].includes(body.alwaysPreserveClientCache)) {
updates.alwaysPreserveClientCache = body.alwaysPreserveClientCache;
}
await updateSettings(updates);
return NextResponse.json({ ok: true });
} catch (error) {
return NextResponse.json({ error: String(error) }, { status: 500 });
}
}
+36 -1
View File
@@ -1712,6 +1712,17 @@
"cacheMisses": "Cache Misses",
"hitRate": "Hit Rate",
"cacheEntries": "Cache Entries",
"cacheSettings": "Cache Settings",
"semanticCache": "Semantic Cache",
"maxEntries": "Max Entries",
"ttlMinutes": "TTL (minutes)",
"promptCache": "Prompt Cache",
"strategy": "Strategy",
"preserveClientCache": "Preserve Client Cache",
"enabled": "Enabled",
"loading": "Loading...",
"saving": "Saving...",
"save": "Save",
"circuitBreaker": "Circuit Breaker",
"retryPolicy": "Retry Policy",
"maxRetries": "Max Retries",
@@ -2920,6 +2931,30 @@
"clearSuccess": "Cache cleared. {count} expired entries removed.",
"clearError": "Failed to clear cache.",
"unavailable": "Cache unavailable",
"unavailableDesc": "Could not fetch cache statistics. Make sure the server is running."
"unavailableDesc": "Could not fetch cache statistics. Make sure the server is running.",
"promptCache": "Prompt Cache (Provider-Side)",
"cachedRequests": "Cached Requests",
"cacheHitRate": "Cache Hit Rate",
"cachedTokens": "Cached Tokens",
"cacheCreationTokens": "Cache Creation Tokens",
"byProvider": "Breakdown by Provider",
"provider": "Provider",
"requests": "Requests",
"inputTokens": "Input Tokens",
"cachedTokensCol": "Cached",
"cacheCreation": "Creation",
"trend24h": "Cache Trend (24h)",
"cached": "Cached",
"overview": "Overview",
"entries": "Entries",
"searchEntries": "Search entries...",
"search": "Search",
"loading": "Loading...",
"noEntries": "No cache entries found",
"signature": "Signature",
"model": "Model",
"created": "Created",
"expires": "Expires",
"actions": "Actions"
}
}
+58 -1
View File
@@ -577,9 +577,14 @@ export async function getCacheMetrics() {
cacheCreationTokens: number | null;
}>;
// Calculate tokens saved (cached tokens are reused, not charged at full price)
const tokensSaved = totalsRow?.totalCachedTokens || 0;
const AVG_INPUT_PRICE_PER_MILLION = 3;
const CACHE_DISCOUNT = 0.9;
const estimatedCostSaved =
Math.round((tokensSaved / 1_000_000) * AVG_INPUT_PRICE_PER_MILLION * CACHE_DISCOUNT * 100) /
100;
// Build byProvider object
const byProvider: Record<
string,
@@ -653,6 +658,58 @@ export async function updateCacheMetrics(_metrics: Record<string, unknown>) {
return getCacheMetrics();
}
export interface CacheTrendPoint {
timestamp: string;
requests: number;
cachedRequests: number;
inputTokens: number;
cachedTokens: number;
cacheCreationTokens: number;
}
export async function getCacheTrend(hours = 24): Promise<CacheTrendPoint[]> {
const db = getDbInstance();
try {
const rows = db
.prepare(
`
SELECT
strftime('%Y-%m-%dT%H:00:00Z', timestamp) as hour,
COUNT(*) as requests,
SUM(CASE WHEN tokens_cache_read > 0 OR tokens_cache_creation > 0 THEN 1 ELSE 0 END) as cachedRequests,
SUM(tokens_input) as inputTokens,
SUM(tokens_cache_read) as cachedTokens,
SUM(tokens_cache_creation) as cacheCreationTokens
FROM usage_history
WHERE timestamp >= datetime('now', ?)
GROUP BY hour
ORDER BY hour ASC
`
)
.all(`-${hours} hours`) as Array<{
hour: string;
requests: number;
cachedRequests: number;
inputTokens: number | null;
cachedTokens: number | null;
cacheCreationTokens: number | null;
}>;
return rows.map((r) => ({
timestamp: r.hour,
requests: r.requests,
cachedRequests: r.cachedRequests,
inputTokens: r.inputTokens || 0,
cachedTokens: r.cachedTokens || 0,
cacheCreationTokens: r.cacheCreationTokens || 0,
}));
} catch (error) {
console.error("Failed to fetch cache trend:", error);
return [];
}
}
export async function resetCacheMetrics() {
// No-op: cannot delete historical usage data
// Cache metrics are computed from usage_history, so they reflect actual request history
+1
View File
@@ -0,0 +1 @@
export { analyzePrefix, shouldInjectCacheControl } from "./prefixAnalyzer";
+77
View File
@@ -0,0 +1,77 @@
import crypto from "crypto";
interface Message {
role: string;
content: string | unknown[];
}
interface PrefixAnalysis {
prefixEndIdx: number;
prefixHash: string;
prefixTokens: number;
prefixType: "system_only" | "system_and_tools" | "system_tools_history";
confidence: number;
}
function normalizeContent(content: string | unknown[]): string {
if (typeof content === "string") return content;
return JSON.stringify(content);
}
function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
export function analyzePrefix(messages: Message[]): PrefixAnalysis {
if (!Array.isArray(messages) || messages.length === 0) {
return {
prefixEndIdx: -1,
prefixHash: "",
prefixTokens: 0,
prefixType: "system_only",
confidence: 0,
};
}
let prefixEndIdx = -1;
let prefixType: PrefixAnalysis["prefixType"] = "system_only";
let confidence = 0.5;
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
const role = msg.role || "user";
if (role === "system") {
prefixEndIdx = i;
prefixType = "system_only";
confidence = 0.9;
} else if (role === "tool" || (role === "assistant" && Array.isArray(msg.content))) {
prefixEndIdx = i;
prefixType = "system_and_tools";
confidence = 0.8;
} else if (role === "assistant") {
prefixEndIdx = i;
prefixType = "system_tools_history";
confidence = 0.7;
} else {
break;
}
}
const prefixMessages = messages.slice(0, prefixEndIdx + 1);
const prefixText = prefixMessages.map((m) => normalizeContent(m.content)).join("\n");
const prefixHash = crypto.createHash("sha256").update(prefixText).digest("hex");
const prefixTokens = estimateTokens(prefixText);
return {
prefixEndIdx,
prefixHash,
prefixTokens,
prefixType,
confidence,
};
}
export function shouldInjectCacheControl(analysis: PrefixAnalysis, minTokens = 1024): boolean {
return analysis.prefixTokens >= minTokens && analysis.confidence >= 0.7;
}
+67 -18
View File
@@ -29,6 +29,45 @@ function toNumber(value: unknown, fallback = 0): number {
return fallback;
}
function ensureCacheMetricsTable() {
try {
const db = getDbInstance();
db.prepare(
`CREATE TABLE IF NOT EXISTS cache_metrics (
key TEXT PRIMARY KEY,
value INTEGER NOT NULL DEFAULT 0,
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`
).run();
db.prepare(
`INSERT OR IGNORE INTO cache_metrics (key, value) VALUES ('hits', 0), ('misses', 0), ('tokens_saved', 0)`
).run();
} catch {
// DB not available
}
}
function incrementMetric(metric: "hits" | "misses" | "tokens_saved", amount = 1) {
try {
const db = getDbInstance();
db.prepare(
`UPDATE cache_metrics SET value = value + ?, updated_at = datetime('now') WHERE key = ?`
).run(amount, metric);
} catch {
// DB not available — fall back to in-memory
}
}
function getMetricValue(metric: string): number {
try {
const db = getDbInstance();
const row = db.prepare(`SELECT value FROM cache_metrics WHERE key = ?`).get(metric);
return row ? toNumber(asRecord(row).value, 0) : 0;
} catch {
return 0;
}
}
function getHeaderValue(
headers: { get?: (name: string) => string | null } | Record<string, unknown> | null | undefined,
name: string
@@ -51,7 +90,6 @@ function getHeaderValue(
// ─── Singleton ─────────────────
let memoryCache: LRUCache | null = null;
let stats = { hits: 0, misses: 0, tokensSaved: 0 };
function getMemoryCache() {
if (!memoryCache) {
@@ -60,6 +98,7 @@ function getMemoryCache() {
maxBytes: parseInt(process.env.SEMANTIC_CACHE_MAX_BYTES || String(4 * 1024 * 1024), 10),
defaultTTL: parseInt(process.env.SEMANTIC_CACHE_TTL_MS || "1800000", 10),
});
ensureCacheMetricsTable();
}
return memoryCache;
}
@@ -108,8 +147,8 @@ export function getCachedResponse(signature) {
// 1. Check memory cache
const memResult = getMemoryCache().get(signature);
if (memResult) {
stats.hits++;
stats.tokensSaved += memResult.tokensSaved || 0;
incrementMetric("hits");
incrementMetric("tokens_saved", memResult.tokensSaved || 0);
return memResult.response;
}
@@ -126,7 +165,7 @@ export function getCachedResponse(signature) {
const record = asRecord(row);
const responsePayload = typeof record.response === "string" ? record.response : null;
if (!responsePayload) {
stats.misses++;
incrementMetric("misses");
return null;
}
const parsed = JSON.parse(responsePayload);
@@ -141,15 +180,15 @@ export function getCachedResponse(signature) {
signature
);
stats.hits++;
stats.tokensSaved += tokensSaved;
incrementMetric("hits");
incrementMetric("tokens_saved", tokensSaved);
return parsed;
}
} catch {
// DB not available — fail open
}
stats.misses++;
incrementMetric("misses");
return null;
}
@@ -280,6 +319,17 @@ export function stopAutoCleanup(): void {
}
}
export function cleanOldMetrics(retentionDays = 90): number {
try {
const db = getDbInstance();
const cutoff = new Date(Date.now() - retentionDays * 86400000).toISOString();
const result = db.prepare("DELETE FROM semantic_cache WHERE created_at < ?").run(cutoff);
return result.changes || 0;
} catch {
return 0;
}
}
/**
* Clear all cache entries.
*/
@@ -288,17 +338,12 @@ export function clearCache() {
try {
const db = getDbInstance();
db.prepare("DELETE FROM semantic_cache").run();
db.prepare("UPDATE cache_metrics SET value = 0").run();
} catch {
// DB not available
}
stats = { hits: 0, misses: 0, tokensSaved: 0 };
}
// ─── Stats ─────────────────
/**
* Get cache statistics.
*/
export function getCacheStats() {
const memStats = getMemoryCache().getStats();
let dbSize = 0;
@@ -312,14 +357,18 @@ export function getCacheStats() {
// DB not available
}
const total = stats.hits + stats.misses;
const hits = getMetricValue("hits");
const misses = getMetricValue("misses");
const tokensSaved = getMetricValue("tokens_saved");
const total = hits + misses;
return {
memoryEntries: memStats.size,
dbEntries: dbSize,
hits: stats.hits,
misses: stats.misses,
hitRate: total > 0 ? ((stats.hits / total) * 100).toFixed(1) : "0.0",
tokensSaved: stats.tokensSaved,
hits,
misses,
hitRate: total > 0 ? ((hits / total) * 100).toFixed(1) : "0.0",
tokensSaved,
};
}
+1
View File
@@ -656,6 +656,7 @@ export const ID_TO_ALIAS = Object.values(AI_PROVIDERS).reduce((acc, p) => {
// Providers that support usage/quota API
export const USAGE_SUPPORTED_PROVIDERS = [
"antigravity",
"gemini-cli",
"kiro",
"github",
"codex",