Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| df23162e9d | |||
| 2c12f18b44 | |||
| eaeb28b4e1 | |||
| d5647eab33 | |||
| 89eb8885b1 | |||
| a5dc5687f8 | |||
| 6780485051 | |||
| d043e7a242 | |||
| c5d9b5f51d | |||
| 35e2892b98 | |||
| 11dfdbb7a3 |
@@ -3,6 +3,22 @@
|
||||
## [Unreleased]
|
||||
|
||||
---
|
||||
|
||||
## [3.3.5] - 2026-03-30
|
||||
|
||||
### ✨ New Features
|
||||
|
||||
- **Gemini Quota Tracking:** Added real-time Gemini CLI quota tracking via the `retrieveUserQuota` API (PR #825)
|
||||
- **Cache Dashboard:** Enhanced the Cache Dashboard to display prompt cache metrics, 24h trends, and estimated cost savings (PR #824)
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Token Accounting:** Included prompt cache tokens safely in historical usage inputs calculations for correct quota deductions (PR #822)
|
||||
- **User Experience:** Removed invasive auto-opening OAuth modal loops on barren provider detailed pages (PR #820)
|
||||
- **Dependency Updates:** Bumped and locked down dependencies for development and production trees including Next.js 16.2.1, Recharts, and TailwindCSS 4.2.2 (PR #826, #827)
|
||||
|
||||
---
|
||||
|
||||
## [3.3.4] - 2026-03-30
|
||||
|
||||
### ✨ New Features
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
openapi: 3.1.0
|
||||
info:
|
||||
title: OmniRoute API
|
||||
version: 3.3.4
|
||||
version: 3.3.5
|
||||
description: |
|
||||
OmniRoute is a local-first AI API proxy router. It provides an OpenAI-compatible
|
||||
endpoint that routes requests to multiple AI providers with load balancing,
|
||||
|
||||
@@ -226,23 +226,18 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
oauth: {
|
||||
clientIdEnv: "GEMINI_CLI_OAUTH_CLIENT_ID",
|
||||
clientIdDefault: "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com",
|
||||
clientSecretEnv: "GEMINI_CLI_OAUTH_CLIENT_SECRET",
|
||||
clientSecretEnv: "GEMINI_OAUTH_CLIENT_SECRET",
|
||||
clientSecretDefault: "",
|
||||
},
|
||||
models: [
|
||||
{ id: "gemini-3.1-pro-high", name: "Gemini 3.1 Pro High" },
|
||||
{ id: "gemini-3.1-pro-low", name: "Gemini 3.1 Pro Low" },
|
||||
{ id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" },
|
||||
{ id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" },
|
||||
{ id: "gemini-3-pro-preview", name: "Gemini 3 Pro Preview" },
|
||||
{ id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
|
||||
{ id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
|
||||
{ id: "gemini-3.1-pro-preview-customtools", name: "Gemini 3.1 Pro Preview Custom Tools" },
|
||||
{ id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
|
||||
{ id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
|
||||
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
|
||||
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
|
||||
{ id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
|
||||
{ id: "gemini-2.0-flash", name: "Gemini 2.0 Flash" },
|
||||
{ id: "gemini-1.5-pro", name: "Gemini 1.5 Pro" },
|
||||
{ id: "gemini-1.5-flash", name: "Gemini 1.5 Flash" },
|
||||
],
|
||||
},
|
||||
|
||||
|
||||
@@ -60,6 +60,12 @@ export {
|
||||
getSessionSnapshotInput,
|
||||
getSessionSnapshotOutput,
|
||||
getSessionSnapshotTool,
|
||||
cacheStatsInput,
|
||||
cacheStatsOutput,
|
||||
cacheStatsTool,
|
||||
cacheFlushInput,
|
||||
cacheFlushOutput,
|
||||
cacheFlushTool,
|
||||
} from "./tools.ts";
|
||||
|
||||
// A2A schemas
|
||||
|
||||
@@ -806,11 +806,73 @@ export const syncPricingTool: McpToolDefinition<typeof syncPricingInput, typeof
|
||||
sourceEndpoints: ["/api/pricing/sync"],
|
||||
};
|
||||
|
||||
// ============ Cache Tools ============
|
||||
|
||||
export const cacheStatsInput = z.object({}).describe("No parameters required");
|
||||
|
||||
export const cacheStatsOutput = z.object({
|
||||
semanticCache: z.object({
|
||||
memoryEntries: z.number(),
|
||||
dbEntries: z.number(),
|
||||
hits: z.number(),
|
||||
misses: z.number(),
|
||||
hitRate: z.string(),
|
||||
tokensSaved: z.number(),
|
||||
}),
|
||||
promptCache: z
|
||||
.object({
|
||||
totalRequests: z.number(),
|
||||
requestsWithCacheControl: z.number(),
|
||||
totalCachedTokens: z.number(),
|
||||
totalCacheCreationTokens: z.number(),
|
||||
estimatedCostSaved: z.number(),
|
||||
})
|
||||
.nullable(),
|
||||
idempotency: z.object({
|
||||
activeKeys: z.number(),
|
||||
windowMs: z.number(),
|
||||
}),
|
||||
});
|
||||
|
||||
export const cacheStatsTool: McpToolDefinition<typeof cacheStatsInput, typeof cacheStatsOutput> = {
|
||||
name: "omniroute_cache_stats",
|
||||
description:
|
||||
"Returns cache statistics including semantic cache hit rate, prompt cache metrics by provider, and idempotency layer stats.",
|
||||
inputSchema: cacheStatsInput,
|
||||
outputSchema: cacheStatsOutput,
|
||||
scopes: ["read:cache"],
|
||||
auditLevel: "basic",
|
||||
phase: 2,
|
||||
sourceEndpoints: ["/api/cache"],
|
||||
};
|
||||
|
||||
export const cacheFlushInput = z.object({
|
||||
signature: z.string().optional().describe("Specific cache signature to invalidate"),
|
||||
model: z.string().optional().describe("Invalidate all entries for a specific model"),
|
||||
});
|
||||
|
||||
export const cacheFlushOutput = z.object({
|
||||
ok: z.boolean(),
|
||||
invalidated: z.number().optional(),
|
||||
scope: z.string().optional(),
|
||||
});
|
||||
|
||||
export const cacheFlushTool: McpToolDefinition<typeof cacheFlushInput, typeof cacheFlushOutput> = {
|
||||
name: "omniroute_cache_flush",
|
||||
description:
|
||||
"Flush cache entries. Provide signature to invalidate a single entry, model to invalidate all entries for a model, or omit both to clear all.",
|
||||
inputSchema: cacheFlushInput,
|
||||
outputSchema: cacheFlushOutput,
|
||||
scopes: ["write:cache"],
|
||||
auditLevel: "full",
|
||||
phase: 2,
|
||||
sourceEndpoints: ["/api/cache"],
|
||||
};
|
||||
|
||||
// ============ Tool Registry ============
|
||||
|
||||
/** All MCP tool definitions, ordered by phase then name */
|
||||
export const MCP_TOOLS = [
|
||||
// Phase 1: Essential
|
||||
getHealthTool,
|
||||
listCombosTool,
|
||||
getComboMetricsTool,
|
||||
@@ -819,7 +881,6 @@ export const MCP_TOOLS = [
|
||||
routeRequestTool,
|
||||
costReportTool,
|
||||
listModelsCatalogTool,
|
||||
// Phase 2: Advanced
|
||||
simulateRouteTool,
|
||||
setBudgetGuardTool,
|
||||
setRoutingStrategyTool,
|
||||
@@ -830,6 +891,8 @@ export const MCP_TOOLS = [
|
||||
explainRouteTool,
|
||||
getSessionSnapshotTool,
|
||||
syncPricingTool,
|
||||
cacheStatsTool,
|
||||
cacheFlushTool,
|
||||
] as const;
|
||||
|
||||
/** Essential tools only (Phase 1) */
|
||||
|
||||
+172
-27
@@ -159,13 +159,13 @@ async function getGlmUsage(apiKey: string, providerSpecificData?: Record<string,
|
||||
* @returns {Promise<unknown>} Usage data with quotas
|
||||
*/
|
||||
export async function getUsageForProvider(connection) {
|
||||
const { provider, accessToken, apiKey, providerSpecificData } = connection;
|
||||
const { provider, accessToken, apiKey, providerSpecificData, projectId } = connection;
|
||||
|
||||
switch (provider) {
|
||||
case "github":
|
||||
return await getGitHubUsage(accessToken, providerSpecificData);
|
||||
case "gemini-cli":
|
||||
return await getGeminiUsage(accessToken);
|
||||
return await getGeminiUsage(accessToken, providerSpecificData, projectId);
|
||||
case "antigravity":
|
||||
return await getAntigravityUsage(accessToken, undefined);
|
||||
case "claude":
|
||||
@@ -195,24 +195,22 @@ function parseResetTime(resetValue) {
|
||||
if (!resetValue) return null;
|
||||
|
||||
try {
|
||||
// If it's already a Date object
|
||||
let date;
|
||||
if (resetValue instanceof Date) {
|
||||
return resetValue.toISOString();
|
||||
date = resetValue;
|
||||
} else if (typeof resetValue === "number") {
|
||||
date = new Date(resetValue);
|
||||
} else if (typeof resetValue === "string") {
|
||||
date = new Date(resetValue);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
// If it's a number (Unix timestamp in milliseconds)
|
||||
if (typeof resetValue === "number") {
|
||||
return new Date(resetValue).toISOString();
|
||||
}
|
||||
// Epoch-zero (1970-01-01) means no scheduled reset — treat as null
|
||||
if (date.getTime() <= 0) return null;
|
||||
|
||||
// If it's a string (ISO date or parseable date string)
|
||||
if (typeof resetValue === "string") {
|
||||
return new Date(resetValue).toISOString();
|
||||
}
|
||||
|
||||
return null;
|
||||
return date.toISOString();
|
||||
} catch (error) {
|
||||
console.warn(`Failed to parse reset time: ${resetValue}`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -417,36 +415,183 @@ function inferGitHubPlanName(data: JsonRecord, premiumQuota: UsageQuota | null):
|
||||
return "GitHub Copilot";
|
||||
}
|
||||
|
||||
// ── Gemini CLI subscription info cache ──────────────────────────────────────
|
||||
// Prevents duplicate loadCodeAssist calls within the same quota cycle.
|
||||
// Key: accessToken → { data, fetchedAt }
|
||||
const _geminiCliSubCache = new Map();
|
||||
const GEMINI_CLI_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
/**
|
||||
* Gemini CLI Usage (Google Cloud)
|
||||
* Gemini CLI Usage — fetch per-model quota from Cloud Code Assist API.
|
||||
* Gemini CLI and Antigravity share the same upstream (cloudcode-pa.googleapis.com),
|
||||
* so this follows the same pattern as getAntigravityUsage().
|
||||
*/
|
||||
async function getGeminiUsage(accessToken) {
|
||||
async function getGeminiUsage(accessToken, providerSpecificData?, connectionProjectId?) {
|
||||
if (!accessToken) {
|
||||
return { plan: "Free", message: "Gemini CLI access token not available." };
|
||||
}
|
||||
|
||||
try {
|
||||
// Gemini CLI uses Google Cloud quotas
|
||||
// Try to get quota info from Cloud Resource Manager
|
||||
const subscriptionInfo = await getGeminiCliSubscriptionInfoCached(accessToken);
|
||||
const projectId =
|
||||
connectionProjectId ||
|
||||
providerSpecificData?.projectId ||
|
||||
subscriptionInfo?.cloudaicompanionProject ||
|
||||
null;
|
||||
|
||||
const plan = getGeminiCliPlanLabel(subscriptionInfo);
|
||||
|
||||
if (!projectId) {
|
||||
return { plan, message: "Gemini CLI project ID not available." };
|
||||
}
|
||||
|
||||
// Use retrieveUserQuota (same endpoint as Gemini CLI /stats command).
|
||||
// Returns per-model buckets with remainingFraction and resetTime.
|
||||
const response = await fetch(
|
||||
"https://cloudresourcemanager.googleapis.com/v1/projects?filter=lifecycleState:ACTIVE",
|
||||
"https://cloudcode-pa.googleapis.com/v1internal:retrieveUserQuota",
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
Accept: "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ project: projectId }),
|
||||
signal: AbortSignal.timeout(10000),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
// Quota API may not be accessible, return generic message
|
||||
return {
|
||||
message: "Gemini CLI uses Google Cloud quotas. Check Google Cloud Console for details.",
|
||||
};
|
||||
return { plan, message: `Gemini CLI quota error (${response.status}).` };
|
||||
}
|
||||
|
||||
return { message: "Gemini CLI connected. Usage tracked via Google Cloud Console." };
|
||||
const data = await response.json();
|
||||
const quotas: Record<string, UsageQuota> = {};
|
||||
|
||||
if (Array.isArray(data.buckets)) {
|
||||
for (const bucket of data.buckets) {
|
||||
if (!bucket.modelId || bucket.remainingFraction == null) continue;
|
||||
|
||||
const remainingFraction = toNumber(bucket.remainingFraction, 0);
|
||||
const remainingPercentage = remainingFraction * 100;
|
||||
const QUOTA_NORMALIZED_BASE = 1000;
|
||||
const total = QUOTA_NORMALIZED_BASE;
|
||||
const remaining = Math.round(total * remainingFraction);
|
||||
const used = Math.max(0, total - remaining);
|
||||
|
||||
quotas[bucket.modelId] = {
|
||||
used,
|
||||
total,
|
||||
resetAt: parseResetTime(bucket.resetTime),
|
||||
remainingPercentage,
|
||||
unlimited: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return { plan, quotas };
|
||||
} catch (error) {
|
||||
return { message: "Unable to fetch Gemini usage. Check Google Cloud Console." };
|
||||
return { message: `Gemini CLI error: ${(error as Error).message}` };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Gemini CLI subscription info (cached, 5 min TTL)
|
||||
*/
|
||||
async function getGeminiCliSubscriptionInfoCached(accessToken) {
|
||||
const cacheKey = accessToken;
|
||||
const cached = _geminiCliSubCache.get(cacheKey);
|
||||
|
||||
if (cached && Date.now() - cached.fetchedAt < GEMINI_CLI_CACHE_TTL_MS) {
|
||||
return cached.data;
|
||||
}
|
||||
|
||||
const data = await getGeminiCliSubscriptionInfo(accessToken);
|
||||
_geminiCliSubCache.set(cacheKey, { data, fetchedAt: Date.now() });
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Gemini CLI subscription info using correct headers.
|
||||
*/
|
||||
async function getGeminiCliSubscriptionInfo(accessToken) {
|
||||
try {
|
||||
const response = await fetch(
|
||||
"https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist",
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
metadata: {
|
||||
ideType: "IDE_UNSPECIFIED",
|
||||
platform: "PLATFORM_UNSPECIFIED",
|
||||
pluginType: "GEMINI",
|
||||
},
|
||||
}),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) return null;
|
||||
|
||||
return await response.json();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map Gemini CLI subscription tier to display label (same tiers as Antigravity).
|
||||
*/
|
||||
function getGeminiCliPlanLabel(subscriptionInfo) {
|
||||
if (!subscriptionInfo || Object.keys(subscriptionInfo).length === 0) return "Free";
|
||||
|
||||
let tierId = "";
|
||||
if (Array.isArray(subscriptionInfo.allowedTiers)) {
|
||||
for (const tier of subscriptionInfo.allowedTiers) {
|
||||
if (tier.isDefault && tier.id) {
|
||||
tierId = tier.id.trim().toUpperCase();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!tierId) {
|
||||
tierId = (subscriptionInfo.currentTier?.id || "").toUpperCase();
|
||||
}
|
||||
|
||||
if (tierId) {
|
||||
if (tierId.includes("ULTRA")) return "Ultra";
|
||||
if (tierId.includes("PRO")) return "Pro";
|
||||
if (tierId.includes("ENTERPRISE")) return "Enterprise";
|
||||
if (tierId.includes("BUSINESS") || tierId.includes("STANDARD")) return "Business";
|
||||
if (tierId.includes("FREE") || tierId.includes("INDIVIDUAL") || tierId.includes("LEGACY"))
|
||||
return "Free";
|
||||
}
|
||||
|
||||
const tierName =
|
||||
subscriptionInfo.currentTier?.name ||
|
||||
subscriptionInfo.currentTier?.displayName ||
|
||||
subscriptionInfo.subscriptionType ||
|
||||
subscriptionInfo.tier ||
|
||||
"";
|
||||
const upper = tierName.toUpperCase();
|
||||
|
||||
if (upper.includes("ULTRA")) return "Ultra";
|
||||
if (upper.includes("PRO")) return "Pro";
|
||||
if (upper.includes("ENTERPRISE")) return "Enterprise";
|
||||
if (upper.includes("STANDARD") || upper.includes("BUSINESS")) return "Business";
|
||||
if (upper.includes("INDIVIDUAL") || upper.includes("FREE")) return "Free";
|
||||
|
||||
if (subscriptionInfo.currentTier?.upgradeSubscriptionType) return "Free";
|
||||
if (tierName) {
|
||||
return tierName.charAt(0).toUpperCase() + tierName.slice(1).toLowerCase();
|
||||
}
|
||||
|
||||
return "Free";
|
||||
}
|
||||
|
||||
// ── Antigravity subscription info cache ──────────────────────────────────────
|
||||
// Prevents duplicate loadCodeAssist calls within the same quota cycle.
|
||||
// Key: truncated accessToken → { data, fetchedAt }
|
||||
|
||||
@@ -72,12 +72,7 @@ const DETERMINISTIC_STRATEGIES: Set<RoutingStrategyValue> = new Set(["priority",
|
||||
/**
|
||||
* Providers that support prompt caching
|
||||
*/
|
||||
const CACHING_PROVIDERS = new Set([
|
||||
"claude",
|
||||
"anthropic",
|
||||
"zai",
|
||||
"qwen", // Alibaba Qwen Coding Plan International
|
||||
]);
|
||||
const CACHING_PROVIDERS = new Set(["claude", "anthropic", "zai", "qwen", "deepseek"]);
|
||||
|
||||
/**
|
||||
* Detect if the client is Claude Code or another caching-aware client
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "omniroute",
|
||||
"version": "3.3.4",
|
||||
"version": "3.3.5",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "omniroute",
|
||||
"version": "3.3.4",
|
||||
"version": "3.3.5",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"workspaces": [
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "omniroute",
|
||||
"version": "3.3.4",
|
||||
"version": "3.3.5",
|
||||
"description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
import { Card } from "@/shared/components";
|
||||
import { useTranslations } from "next-intl";
|
||||
|
||||
export default function DiversityScoreCard() {
|
||||
const [data, setData] = useState<any>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const t = useTranslations("analytics");
|
||||
|
||||
useEffect(() => {
|
||||
fetch("/api/analytics/diversity")
|
||||
.then((res) => res.json())
|
||||
.then((json) => {
|
||||
setData(json);
|
||||
setLoading(false);
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error(err);
|
||||
setLoading(false);
|
||||
});
|
||||
}, []);
|
||||
|
||||
if (loading || !data) {
|
||||
return (
|
||||
<Card className="p-5 flex flex-col justify-center items-center h-full min-h-[200px]">
|
||||
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary"></div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
const scorePercentage = Math.round((data.score || 0) * 100);
|
||||
|
||||
let riskColor = "text-green-500";
|
||||
let gaugeColor = "bg-green-500";
|
||||
let riskLabel = "Healthy Distribution";
|
||||
|
||||
if (scorePercentage < 40) {
|
||||
riskColor = "text-red-500";
|
||||
gaugeColor = "bg-red-500";
|
||||
riskLabel = "High Vendor Lock-in Risk";
|
||||
} else if (scorePercentage < 70) {
|
||||
riskColor = "text-amber-500";
|
||||
gaugeColor = "bg-amber-500";
|
||||
riskLabel = "Moderate Distribution";
|
||||
}
|
||||
|
||||
return (
|
||||
<Card className="p-5 flex flex-col h-full bg-[var(--card-bg,#1e1e2e)] relative overflow-hidden group">
|
||||
<div className="flex items-center gap-2 mb-4">
|
||||
<span className="material-symbols-outlined text-[20px] text-cyan-400">pie_chart</span>
|
||||
<h3 className="font-semibold text-[var(--text-primary,#fff)] flex-1">
|
||||
Provider Diversity Score
|
||||
</h3>
|
||||
<span
|
||||
className={`text-xs px-2 py-0.5 rounded-md border ${gaugeColor.replace("bg-", "border-").replace("500", "500/20")} ${gaugeColor.replace("bg-", "bg-").replace("500", "500/10")} ${riskColor}`}
|
||||
>
|
||||
Shannon Entropy
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between mt-2 mb-6">
|
||||
<div className="flex flex-col">
|
||||
<span className={`text-4xl font-bold tabular-nums tracking-tight ${riskColor}`}>
|
||||
{scorePercentage}%
|
||||
</span>
|
||||
<span className="text-sm text-[var(--text-muted,#aaaaaa)] mt-1">{riskLabel}</span>
|
||||
</div>
|
||||
|
||||
{/* Simple CSS Donut */}
|
||||
<div className="relative w-20 h-20 flex-shrink-0">
|
||||
<svg className="w-full h-full transform -rotate-90" viewBox="0 0 36 36">
|
||||
<path
|
||||
className="text-[var(--border,#333)]"
|
||||
strokeWidth="4"
|
||||
stroke="currentColor"
|
||||
fill="none"
|
||||
d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
|
||||
/>
|
||||
<path
|
||||
className={riskColor}
|
||||
strokeWidth="4"
|
||||
strokeDasharray={`${scorePercentage}, 100`}
|
||||
stroke="currentColor"
|
||||
fill="none"
|
||||
strokeLinecap="round"
|
||||
d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="space-y-4 flex-1">
|
||||
<p className="text-xs uppercase tracking-wider font-semibold text-[var(--text-muted,#888)]">
|
||||
Provider Share
|
||||
</p>
|
||||
|
||||
{Object.keys(data.providers || {}).length === 0 ? (
|
||||
<div className="text-sm text-[var(--text-secondary,#666)] py-2">
|
||||
No recent usage data available.
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
{Object.entries(data.providers)
|
||||
.sort(([, a]: any, [, b]: any) => b.share - a.share)
|
||||
.slice(0, 4) // Top 4 providers
|
||||
.map(([provider, stat]: [string, any]) => (
|
||||
<div key={provider} className="flex flex-col gap-1.5">
|
||||
<div className="flex items-center justify-between text-sm">
|
||||
<span className="font-medium text-[var(--text-primary,#ddd)] capitalize">
|
||||
{provider}
|
||||
</span>
|
||||
<span className="font-mono text-[var(--text-muted,#aaa)]">
|
||||
{Math.round(stat.share * 100)}%
|
||||
</span>
|
||||
</div>
|
||||
<div className="w-full h-1.5 bg-[var(--surface,#333)] rounded-full overflow-hidden">
|
||||
<div
|
||||
className={`h-full ${gaugeColor} rounded-full`}
|
||||
style={{ width: `${Math.round(stat.share * 100)}%` }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="mt-4 pt-4 border-t border-[var(--border,#333)] flex justify-between text-[11px] text-[var(--text-muted,#777)]">
|
||||
<span>Window: {data.windowSize} reqs</span>
|
||||
<span>Based on Last {Math.round(data.ttlMs / 60000)} mins</span>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import { useState, Suspense } from "react";
|
||||
import { UsageAnalytics, CardSkeleton, SegmentedControl } from "@/shared/components";
|
||||
import EvalsTab from "../usage/components/EvalsTab";
|
||||
import SearchAnalyticsTab from "./SearchAnalyticsTab";
|
||||
import DiversityScoreCard from "./components/DiversityScoreCard";
|
||||
import { useTranslations } from "next-intl";
|
||||
|
||||
export default function AnalyticsPage() {
|
||||
@@ -38,9 +39,14 @@ export default function AnalyticsPage() {
|
||||
/>
|
||||
|
||||
{activeTab === "overview" && (
|
||||
<Suspense fallback={<CardSkeleton />}>
|
||||
<UsageAnalytics />
|
||||
</Suspense>
|
||||
<div className="flex flex-col gap-6">
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
<DiversityScoreCard />
|
||||
</div>
|
||||
<Suspense fallback={<CardSkeleton />}>
|
||||
<UsageAnalytics />
|
||||
</Suspense>
|
||||
</div>
|
||||
)}
|
||||
{activeTab === "evals" && <EvalsTab />}
|
||||
{activeTab === "search" && <SearchAnalyticsTab />}
|
||||
|
||||
@@ -0,0 +1,174 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect, useCallback } from "react";
|
||||
import { Button } from "@/shared/components";
|
||||
import { useTranslations } from "next-intl";
|
||||
|
||||
interface CacheEntry {
|
||||
id: string;
|
||||
signature: string;
|
||||
model: string;
|
||||
hit_count: number;
|
||||
tokens_saved: number;
|
||||
created_at: string;
|
||||
expires_at: string;
|
||||
}
|
||||
|
||||
interface Pagination {
|
||||
page: number;
|
||||
limit: number;
|
||||
total: number;
|
||||
totalPages: number;
|
||||
}
|
||||
|
||||
export default function CacheEntriesTab() {
|
||||
const t = useTranslations("cache");
|
||||
const [entries, setEntries] = useState<CacheEntry[]>([]);
|
||||
const [pagination, setPagination] = useState<Pagination>({
|
||||
page: 1,
|
||||
limit: 20,
|
||||
total: 0,
|
||||
totalPages: 0,
|
||||
});
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [search, setSearch] = useState("");
|
||||
const [deleting, setDeleting] = useState<string | null>(null);
|
||||
|
||||
const fetchEntries = useCallback(
|
||||
async (page = 1) => {
|
||||
setLoading(true);
|
||||
try {
|
||||
const params = new URLSearchParams({ page: String(page), limit: String(pagination.limit) });
|
||||
if (search) params.set("search", search);
|
||||
|
||||
const res = await fetch(`/api/cache/entries?${params}`);
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
setEntries(data.entries);
|
||||
setPagination(data.pagination);
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
},
|
||||
[search, pagination.limit]
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
fetchEntries();
|
||||
}, [fetchEntries]);
|
||||
|
||||
const handleDelete = async (signature: string) => {
|
||||
setDeleting(signature);
|
||||
try {
|
||||
await fetch(`/api/cache/entries?signature=${encodeURIComponent(signature)}`, {
|
||||
method: "DELETE",
|
||||
});
|
||||
await fetchEntries(pagination.page);
|
||||
} finally {
|
||||
setDeleting(null);
|
||||
}
|
||||
};
|
||||
|
||||
const formatDate = (dateStr: string) => {
|
||||
return new Date(dateStr).toLocaleString();
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<input
|
||||
type="text"
|
||||
placeholder={t("searchEntries")}
|
||||
value={search}
|
||||
onChange={(e) => setSearch(e.target.value)}
|
||||
onKeyDown={(e) => e.key === "Enter" && fetchEntries()}
|
||||
className="flex-1 px-3 py-2 text-sm rounded-lg border border-border bg-surface text-text-main placeholder:text-text-muted"
|
||||
/>
|
||||
<Button variant="secondary" size="sm" onClick={() => fetchEntries()}>
|
||||
{t("search")}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{loading ? (
|
||||
<div className="text-sm text-text-muted">{t("loading")}</div>
|
||||
) : entries.length === 0 ? (
|
||||
<div className="text-sm text-text-muted text-center py-8">{t("noEntries")}</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="text-left text-xs text-text-muted border-b border-border/30">
|
||||
<th className="pb-2 pr-4">{t("signature")}</th>
|
||||
<th className="pb-2 pr-4">{t("model")}</th>
|
||||
<th className="pb-2 pr-4">{t("hits")}</th>
|
||||
<th className="pb-2 pr-4">{t("tokensSaved")}</th>
|
||||
<th className="pb-2 pr-4">{t("created")}</th>
|
||||
<th className="pb-2 pr-4">{t("expires")}</th>
|
||||
<th className="pb-2">{t("actions")}</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{entries.map((entry) => (
|
||||
<tr key={entry.id} className="border-b border-border/20">
|
||||
<td className="py-2 pr-4 font-mono text-xs">
|
||||
{entry.signature.slice(0, 12)}...
|
||||
</td>
|
||||
<td className="py-2 pr-4">{entry.model}</td>
|
||||
<td className="py-2 pr-4 tabular-nums">{entry.hit_count}</td>
|
||||
<td className="py-2 pr-4 tabular-nums text-green-500">
|
||||
{entry.tokens_saved.toLocaleString()}
|
||||
</td>
|
||||
<td className="py-2 pr-4 text-xs text-text-muted">
|
||||
{formatDate(entry.created_at)}
|
||||
</td>
|
||||
<td className="py-2 pr-4 text-xs text-text-muted">
|
||||
{formatDate(entry.expires_at)}
|
||||
</td>
|
||||
<td className="py-2">
|
||||
<button
|
||||
onClick={() => handleDelete(entry.signature)}
|
||||
disabled={deleting === entry.signature}
|
||||
className="text-xs text-red-400 hover:text-red-300 disabled:opacity-50"
|
||||
>
|
||||
{deleting === entry.signature ? "..." : "🗑️"}
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
{/* Pagination */}
|
||||
{pagination.totalPages > 1 && (
|
||||
<div className="flex items-center justify-center gap-2 pt-2">
|
||||
<Button
|
||||
variant="secondary"
|
||||
size="sm"
|
||||
onClick={() => fetchEntries(pagination.page - 1)}
|
||||
disabled={pagination.page <= 1}
|
||||
>
|
||||
←
|
||||
</Button>
|
||||
<span className="text-sm text-text-muted">
|
||||
{pagination.page} / {pagination.totalPages}
|
||||
</span>
|
||||
<Button
|
||||
variant="secondary"
|
||||
size="sm"
|
||||
onClick={() => fetchEntries(pagination.page + 1)}
|
||||
disabled={pagination.page >= pagination.totalPages}
|
||||
>
|
||||
→
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
+362
-139
@@ -4,6 +4,7 @@ import { useState, useEffect, useCallback } from "react";
|
||||
import { Card, Button, EmptyState } from "@/shared/components";
|
||||
import { useNotificationStore } from "@/store/notificationStore";
|
||||
import { useTranslations } from "next-intl";
|
||||
import CacheEntriesTab from "./components/CacheEntriesTab";
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -16,13 +17,44 @@ interface SemanticCacheStats {
|
||||
tokensSaved: number;
|
||||
}
|
||||
|
||||
interface PromptCacheProviderStats {
|
||||
requests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
|
||||
interface PromptCacheMetrics {
|
||||
totalRequests: number;
|
||||
requestsWithCacheControl: number;
|
||||
totalInputTokens: number;
|
||||
totalCachedTokens: number;
|
||||
totalCacheCreationTokens: number;
|
||||
tokensSaved: number;
|
||||
estimatedCostSaved: number;
|
||||
byProvider: Record<string, PromptCacheProviderStats>;
|
||||
byStrategy: Record<string, PromptCacheProviderStats>;
|
||||
lastUpdated: string;
|
||||
}
|
||||
|
||||
interface IdempotencyStats {
|
||||
activeKeys: number;
|
||||
windowMs: number;
|
||||
}
|
||||
|
||||
interface CacheTrendPoint {
|
||||
timestamp: string;
|
||||
requests: number;
|
||||
cachedRequests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
|
||||
interface CacheStats {
|
||||
semanticCache: SemanticCacheStats;
|
||||
promptCache: PromptCacheMetrics | null;
|
||||
trend: CacheTrendPoint[];
|
||||
idempotency: IdempotencyStats;
|
||||
}
|
||||
|
||||
@@ -107,6 +139,7 @@ export default function CachePage() {
|
||||
const [stats, setStats] = useState<CacheStats | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [clearing, setClearing] = useState(false);
|
||||
const [activeTab, setActiveTab] = useState<"overview" | "entries">("overview");
|
||||
const notify = useNotificationStore();
|
||||
|
||||
const fetchStats = useCallback(async () => {
|
||||
@@ -136,27 +169,32 @@ export default function CachePage() {
|
||||
const res = await fetch("/api/cache", { method: "DELETE" });
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
notify.add({
|
||||
type: "success",
|
||||
message: t("clearSuccess", { count: data.expiredRemoved ?? 0 }),
|
||||
});
|
||||
notify.success(t("clearSuccess", { count: data.expiredRemoved ?? 0 }));
|
||||
await fetchStats();
|
||||
} else {
|
||||
notify.add({ type: "error", message: t("clearError") });
|
||||
notify.error(t("clearError"));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("[CachePage] Failed to clear cache:", error);
|
||||
notify.add({ type: "error", message: t("clearError") });
|
||||
notify.error(t("clearError"));
|
||||
} finally {
|
||||
setClearing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const sc = stats?.semanticCache;
|
||||
const pc = stats?.promptCache;
|
||||
const trend = stats?.trend ?? [];
|
||||
const idp = stats?.idempotency;
|
||||
const hitRate = sc ? parseFloat(sc.hitRate) : 0;
|
||||
const totalRequests = sc ? sc.hits + sc.misses : 0;
|
||||
|
||||
const promptCacheHitRate =
|
||||
pc && pc.totalRequests > 0 ? (pc.requestsWithCacheControl / pc.totalRequests) * 100 : 0;
|
||||
const providerEntries = pc ? Object.entries(pc.byProvider) : [];
|
||||
|
||||
const maxTrendRequests = Math.max(1, ...trend.map((p) => p.requests));
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-6">
|
||||
{/* Header */}
|
||||
@@ -190,149 +228,334 @@ export default function CachePage() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Loading skeleton */}
|
||||
{loading && (
|
||||
<div
|
||||
className="grid grid-cols-2 md:grid-cols-4 gap-4"
|
||||
aria-busy="true"
|
||||
aria-label="Loading cache statistics"
|
||||
{/* Tab navigation */}
|
||||
<div className="flex gap-1 p-1 rounded-lg bg-black/5 dark:bg-white/5 w-fit">
|
||||
<button
|
||||
onClick={() => setActiveTab("overview")}
|
||||
className={`px-4 py-2 rounded-md text-sm font-medium transition-all ${
|
||||
activeTab === "overview"
|
||||
? "bg-white dark:bg-white/10 text-text-main shadow-sm"
|
||||
: "text-text-muted hover:text-text-main"
|
||||
}`}
|
||||
>
|
||||
{Array.from({ length: 4 }).map((_, i) => (
|
||||
<div key={i} className="h-24 rounded-xl bg-surface-raised animate-pulse" />
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
{t("overview")}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setActiveTab("entries")}
|
||||
className={`px-4 py-2 rounded-md text-sm font-medium transition-all ${
|
||||
activeTab === "entries"
|
||||
? "bg-white dark:bg-white/10 text-text-main shadow-sm"
|
||||
: "text-text-muted hover:text-text-main"
|
||||
}`}
|
||||
>
|
||||
{t("entries")}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Error / empty state */}
|
||||
{!loading && !stats && (
|
||||
<EmptyState
|
||||
icon="cached"
|
||||
title={t("unavailable")}
|
||||
description={t("unavailableDesc")}
|
||||
actionLabel={t("refresh")}
|
||||
onAction={() => void fetchStats()}
|
||||
/>
|
||||
)}
|
||||
{/* Entries tab */}
|
||||
{activeTab === "entries" && <CacheEntriesTab />}
|
||||
|
||||
{/* Main content */}
|
||||
{!loading && stats && (
|
||||
{/* Overview tab content */}
|
||||
{activeTab === "overview" && (
|
||||
<>
|
||||
{/* Stats grid */}
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
||||
<StatCard
|
||||
icon="memory"
|
||||
label={t("memoryEntries")}
|
||||
value={sc?.memoryEntries ?? 0}
|
||||
sub={t("memoryEntriesSub")}
|
||||
/>
|
||||
<StatCard
|
||||
icon="storage"
|
||||
label={t("dbEntries")}
|
||||
value={sc?.dbEntries ?? 0}
|
||||
sub={t("dbEntriesSub")}
|
||||
/>
|
||||
<StatCard
|
||||
icon="trending_up"
|
||||
label={t("cacheHits")}
|
||||
value={sc?.hits ?? 0}
|
||||
sub={t("cacheHitsSub", { total: totalRequests })}
|
||||
valueClass="text-green-500"
|
||||
/>
|
||||
<StatCard
|
||||
icon="token"
|
||||
label={t("tokensSaved")}
|
||||
value={(sc?.tokensSaved ?? 0).toLocaleString()}
|
||||
sub={t("tokensSavedSub")}
|
||||
valueClass="text-blue-400"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Hit rate + breakdown */}
|
||||
<Card>
|
||||
<div className="p-5 flex flex-col gap-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<h2 className="font-medium text-sm">{t("performance")}</h2>
|
||||
<span className="text-xs text-text-muted">
|
||||
{t("autoRefresh", { seconds: REFRESH_INTERVAL_SECONDS })}
|
||||
</span>
|
||||
</div>
|
||||
<HitRateBar hitRate={hitRate} label={t("hitRate")} />
|
||||
<div className="grid grid-cols-3 gap-4 pt-3 border-t border-border/30 text-center">
|
||||
<div>
|
||||
<div className="text-lg font-semibold tabular-nums text-green-500">
|
||||
{sc?.hits ?? 0}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("hits")}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-lg font-semibold tabular-nums text-red-400">
|
||||
{sc?.misses ?? 0}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("misses")}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-lg font-semibold tabular-nums">{totalRequests}</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("total")}</div>
|
||||
</div>
|
||||
</div>
|
||||
{/* Loading skeleton */}
|
||||
{loading && (
|
||||
<div
|
||||
className="grid grid-cols-2 md:grid-cols-4 gap-4"
|
||||
aria-busy="true"
|
||||
aria-label="Loading cache statistics"
|
||||
>
|
||||
{Array.from({ length: 4 }).map((_, i) => (
|
||||
<div key={i} className="h-24 rounded-xl bg-surface-raised animate-pulse" />
|
||||
))}
|
||||
</div>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Cache behavior */}
|
||||
<Card>
|
||||
<div className="p-5 flex flex-col gap-3">
|
||||
<h2 className="font-medium text-sm">{t("behavior")}</h2>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
|
||||
<InfoRow icon="info">{t("behaviorDeterministic")}</InfoRow>
|
||||
<InfoRow icon="info">
|
||||
{t.rich("behaviorBypass", {
|
||||
header: () => (
|
||||
<code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
|
||||
X-OmniRoute-No-Cache: true
|
||||
</code>
|
||||
),
|
||||
})}
|
||||
</InfoRow>
|
||||
<InfoRow icon="info">{t("behaviorTwoTier")}</InfoRow>
|
||||
<InfoRow icon="info">
|
||||
{t.rich("behaviorTtl", {
|
||||
envVar: () => (
|
||||
<code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
|
||||
SEMANTIC_CACHE_TTL_MS
|
||||
</code>
|
||||
),
|
||||
})}
|
||||
</InfoRow>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
{/* Error / empty state */}
|
||||
{!loading && !stats && (
|
||||
<EmptyState
|
||||
icon="cached"
|
||||
title={t("unavailable")}
|
||||
description={t("unavailableDesc")}
|
||||
actionLabel={t("refresh")}
|
||||
onAction={() => void fetchStats()}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Idempotency */}
|
||||
<Card>
|
||||
<div className="p-5 flex flex-col gap-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<span
|
||||
className="material-symbols-outlined text-base text-text-muted"
|
||||
aria-hidden="true"
|
||||
>
|
||||
fingerprint
|
||||
</span>
|
||||
<h2 className="font-medium text-sm">{t("idempotency")}</h2>
|
||||
{/* Main content */}
|
||||
{!loading && stats && (
|
||||
<>
|
||||
{/* Stats grid */}
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
||||
<StatCard
|
||||
icon="memory"
|
||||
label={t("memoryEntries")}
|
||||
value={sc?.memoryEntries ?? 0}
|
||||
sub={t("memoryEntriesSub")}
|
||||
/>
|
||||
<StatCard
|
||||
icon="storage"
|
||||
label={t("dbEntries")}
|
||||
value={sc?.dbEntries ?? 0}
|
||||
sub={t("dbEntriesSub")}
|
||||
/>
|
||||
<StatCard
|
||||
icon="trending_up"
|
||||
label={t("cacheHits")}
|
||||
value={sc?.hits ?? 0}
|
||||
sub={t("cacheHitsSub", { total: totalRequests })}
|
||||
valueClass="text-green-500"
|
||||
/>
|
||||
<StatCard
|
||||
icon="token"
|
||||
label={t("tokensSaved")}
|
||||
value={(sc?.tokensSaved ?? 0).toLocaleString()}
|
||||
sub={t("tokensSavedSub")}
|
||||
valueClass="text-blue-400"
|
||||
/>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div className="p-3 rounded-lg bg-surface/50">
|
||||
<div className="text-lg font-semibold tabular-nums">{idp?.activeKeys ?? 0}</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("activeDedupKeys")}</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-surface/50">
|
||||
<div className="text-lg font-semibold tabular-nums">
|
||||
{idp ? `${(idp.windowMs / 1000).toFixed(0)}s` : "—"}
|
||||
|
||||
{/* Hit rate + breakdown */}
|
||||
<Card>
|
||||
<div className="p-5 flex flex-col gap-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<h2 className="font-medium text-sm">{t("performance")}</h2>
|
||||
<span className="text-xs text-text-muted">
|
||||
{t("autoRefresh", { seconds: REFRESH_INTERVAL_SECONDS })}
|
||||
</span>
|
||||
</div>
|
||||
<HitRateBar hitRate={hitRate} label={t("hitRate")} />
|
||||
<div className="grid grid-cols-3 gap-4 pt-3 border-t border-border/30 text-center">
|
||||
<div>
|
||||
<div className="text-lg font-semibold tabular-nums text-green-500">
|
||||
{sc?.hits ?? 0}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("hits")}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-lg font-semibold tabular-nums text-red-400">
|
||||
{sc?.misses ?? 0}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("misses")}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-lg font-semibold tabular-nums">{totalRequests}</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("total")}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("dedupWindow")}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
</Card>
|
||||
|
||||
{/* Prompt Cache Stats */}
|
||||
{pc && (
|
||||
<Card>
|
||||
<div className="p-5 flex flex-col gap-4">
|
||||
<div className="flex items-center gap-2">
|
||||
<span
|
||||
className="material-symbols-outlined text-base text-text-muted"
|
||||
aria-hidden="true"
|
||||
>
|
||||
bolt
|
||||
</span>
|
||||
<h2 className="font-medium text-sm">{t("promptCache")}</h2>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
||||
<div className="p-3 rounded-lg bg-surface/50">
|
||||
<div className="text-lg font-semibold tabular-nums">
|
||||
{pc.requestsWithCacheControl.toLocaleString()}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("cachedRequests")}</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-surface/50">
|
||||
<div className="text-lg font-semibold tabular-nums text-green-500">
|
||||
{promptCacheHitRate.toFixed(1)}%
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("cacheHitRate")}</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-surface/50">
|
||||
<div className="text-lg font-semibold tabular-nums text-blue-400">
|
||||
{pc.totalCachedTokens.toLocaleString()}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("cachedTokens")}</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-surface/50">
|
||||
<div className="text-lg font-semibold tabular-nums text-purple-400">
|
||||
{pc.totalCacheCreationTokens.toLocaleString()}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">
|
||||
{t("cacheCreationTokens")}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{providerEntries.length > 0 && (
|
||||
<div className="pt-3 border-t border-border/30">
|
||||
<h3 className="text-xs font-medium text-text-muted mb-3">
|
||||
{t("byProvider")}
|
||||
</h3>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="text-left text-xs text-text-muted border-b border-border/30">
|
||||
<th className="pb-2 pr-4">{t("provider")}</th>
|
||||
<th className="pb-2 pr-4">{t("requests")}</th>
|
||||
<th className="pb-2 pr-4">{t("inputTokens")}</th>
|
||||
<th className="pb-2 pr-4">{t("cachedTokensCol")}</th>
|
||||
<th className="pb-2">{t("cacheCreation")}</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{providerEntries.map(([provider, data]) => (
|
||||
<tr key={provider} className="border-b border-border/20">
|
||||
<td className="py-2 pr-4 font-medium">{provider}</td>
|
||||
<td className="py-2 pr-4 tabular-nums">
|
||||
{data.requests.toLocaleString()}
|
||||
</td>
|
||||
<td className="py-2 pr-4 tabular-nums">
|
||||
{data.inputTokens.toLocaleString()}
|
||||
</td>
|
||||
<td className="py-2 pr-4 tabular-nums text-green-500">
|
||||
{data.cachedTokens.toLocaleString()}
|
||||
</td>
|
||||
<td className="py-2 tabular-nums text-purple-400">
|
||||
{data.cacheCreationTokens.toLocaleString()}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Cache Trend (24h) */}
|
||||
{trend.length > 0 && (
|
||||
<Card>
|
||||
<div className="p-5 flex flex-col gap-4">
|
||||
<div className="flex items-center gap-2">
|
||||
<span
|
||||
className="material-symbols-outlined text-base text-text-muted"
|
||||
aria-hidden="true"
|
||||
>
|
||||
timeline
|
||||
</span>
|
||||
<h2 className="font-medium text-sm">{t("trend24h")}</h2>
|
||||
</div>
|
||||
<div className="flex items-end gap-1 h-32">
|
||||
{trend.map((point) => {
|
||||
const height = Math.max(4, (point.requests / maxTrendRequests) * 100);
|
||||
const cachedHeight =
|
||||
point.requests > 0
|
||||
? Math.max(2, (point.cachedRequests / point.requests) * height)
|
||||
: 0;
|
||||
const hour = new Date(point.timestamp).toLocaleTimeString([], {
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
hour12: false,
|
||||
});
|
||||
return (
|
||||
<div
|
||||
key={point.timestamp}
|
||||
className="flex-1 flex flex-col items-center gap-1 group relative"
|
||||
>
|
||||
<div className="absolute bottom-full mb-1 hidden group-hover:block bg-surface-raised border border-border rounded px-2 py-1 text-xs whitespace-nowrap z-10">
|
||||
{hour}: {point.requests} {t("requests").toLowerCase()},{" "}
|
||||
{point.cachedRequests} {t("cached").toLowerCase()}
|
||||
</div>
|
||||
<div className="w-full flex flex-col justify-end h-full gap-px">
|
||||
<div
|
||||
className="w-full bg-green-500/30 rounded-t"
|
||||
style={{ height: `${cachedHeight}%` }}
|
||||
/>
|
||||
<div
|
||||
className="w-full bg-text-muted/20 rounded-t"
|
||||
style={{ height: `${height - cachedHeight}%` }}
|
||||
/>
|
||||
</div>
|
||||
<span className="text-[10px] text-text-muted truncate w-full text-center">
|
||||
{hour.split(":")[0]}
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
<div className="flex items-center gap-4 text-xs text-text-muted">
|
||||
<div className="flex items-center gap-1.5">
|
||||
<div className="w-3 h-3 rounded bg-text-muted/20" />
|
||||
<span>{t("total")}</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5">
|
||||
<div className="w-3 h-3 rounded bg-green-500/30" />
|
||||
<span>{t("cached")}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Cache behavior */}
|
||||
<Card>
|
||||
<div className="p-5 flex flex-col gap-3">
|
||||
<h2 className="font-medium text-sm">{t("behavior")}</h2>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
|
||||
<InfoRow icon="info">{t("behaviorDeterministic")}</InfoRow>
|
||||
<InfoRow icon="info">
|
||||
{t.rich("behaviorBypass", {
|
||||
header: () => (
|
||||
<code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
|
||||
X-OmniRoute-No-Cache: true
|
||||
</code>
|
||||
),
|
||||
})}
|
||||
</InfoRow>
|
||||
<InfoRow icon="info">{t("behaviorTwoTier")}</InfoRow>
|
||||
<InfoRow icon="info">
|
||||
{t.rich("behaviorTtl", {
|
||||
envVar: () => (
|
||||
<code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
|
||||
SEMANTIC_CACHE_TTL_MS
|
||||
</code>
|
||||
),
|
||||
})}
|
||||
</InfoRow>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
|
||||
{/* Idempotency */}
|
||||
<Card>
|
||||
<div className="p-5 flex flex-col gap-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<span
|
||||
className="material-symbols-outlined text-base text-text-muted"
|
||||
aria-hidden="true"
|
||||
>
|
||||
fingerprint
|
||||
</span>
|
||||
<h2 className="font-medium text-sm">{t("idempotency")}</h2>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div className="p-3 rounded-lg bg-surface/50">
|
||||
<div className="text-lg font-semibold tabular-nums">
|
||||
{idp?.activeKeys ?? 0}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("activeDedupKeys")}</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-surface/50">
|
||||
<div className="text-lg font-semibold tabular-nums">
|
||||
{idp ? `${(idp.windowMs / 1000).toFixed(0)}s` : "—"}
|
||||
</div>
|
||||
<div className="text-xs text-text-muted mt-0.5">{t("dedupWindow")}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -802,8 +802,6 @@ export default function ProviderDetailPage() {
|
||||
const { copied, copy } = useCopyToClipboard();
|
||||
const t = useTranslations("providers");
|
||||
const notify = useNotificationStore();
|
||||
const hasAutoOpened = useRef(false);
|
||||
const userDismissed = useRef(false);
|
||||
const [proxyTarget, setProxyTarget] = useState(null);
|
||||
const [proxyConfig, setProxyConfig] = useState(null);
|
||||
const [connProxyMap, setConnProxyMap] = useState<
|
||||
@@ -989,25 +987,6 @@ export default function ProviderDetailPage() {
|
||||
}
|
||||
}, [loading, connections, loadConnProxies]);
|
||||
|
||||
// Auto-open Add Connection modal when no connections exist (better UX)
|
||||
// Only fires once on initial load, not on HMR remounts or after user dismissal
|
||||
useEffect(() => {
|
||||
if (
|
||||
!loading &&
|
||||
connections.length === 0 &&
|
||||
providerInfo &&
|
||||
!isCompatible &&
|
||||
!hasAutoOpened.current &&
|
||||
!userDismissed.current
|
||||
) {
|
||||
hasAutoOpened.current = true;
|
||||
if (isOAuth) {
|
||||
setShowOAuthModal(true);
|
||||
} else {
|
||||
setShowAddApiKeyModal(true);
|
||||
}
|
||||
}
|
||||
}, [loading]); // eslint-disable-line react-hooks/exhaustive-deps
|
||||
|
||||
const handleSetAlias = async (modelId, alias, providerAliasOverride = providerAlias) => {
|
||||
const fullModel = `${providerAliasOverride}/${modelId}`;
|
||||
@@ -2428,7 +2407,6 @@ export default function ProviderDetailPage() {
|
||||
providerInfo={providerInfo}
|
||||
onSuccess={handleOAuthSuccess}
|
||||
onClose={() => {
|
||||
userDismissed.current = true;
|
||||
setShowOAuthModal(false);
|
||||
}}
|
||||
/>
|
||||
@@ -2437,7 +2415,6 @@ export default function ProviderDetailPage() {
|
||||
isOpen={showOAuthModal}
|
||||
onSuccess={handleOAuthSuccess}
|
||||
onClose={() => {
|
||||
userDismissed.current = true;
|
||||
setShowOAuthModal(false);
|
||||
}}
|
||||
/>
|
||||
@@ -2448,7 +2425,6 @@ export default function ProviderDetailPage() {
|
||||
providerInfo={providerInfo}
|
||||
onSuccess={handleOAuthSuccess}
|
||||
onClose={() => {
|
||||
userDismissed.current = true;
|
||||
setShowOAuthModal(false);
|
||||
}}
|
||||
/>
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import { Card, Button } from "@/shared/components";
|
||||
import { useTranslations } from "next-intl";
|
||||
|
||||
interface CacheConfig {
|
||||
semanticCacheEnabled: boolean;
|
||||
semanticCacheMaxSize: number;
|
||||
semanticCacheTTL: number;
|
||||
promptCacheEnabled: boolean;
|
||||
promptCacheStrategy: "auto" | "system-only" | "manual";
|
||||
alwaysPreserveClientCache: "auto" | "always" | "never";
|
||||
}
|
||||
|
||||
export default function CacheSettingsTab() {
|
||||
const t = useTranslations("settings");
|
||||
const [config, setConfig] = useState<CacheConfig>({
|
||||
semanticCacheEnabled: true,
|
||||
semanticCacheMaxSize: 100,
|
||||
semanticCacheTTL: 1800000,
|
||||
promptCacheEnabled: true,
|
||||
promptCacheStrategy: "auto",
|
||||
alwaysPreserveClientCache: "auto",
|
||||
});
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
fetch("/api/settings/cache-config")
|
||||
.then((r) => (r.ok ? r.json() : null))
|
||||
.then((data) => {
|
||||
if (data) setConfig(data);
|
||||
})
|
||||
.catch(() => {})
|
||||
.finally(() => setLoading(false));
|
||||
}, []);
|
||||
|
||||
const handleSave = async () => {
|
||||
setSaving(true);
|
||||
try {
|
||||
await fetch("/api/settings/cache-config", {
|
||||
method: "PUT",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(config),
|
||||
});
|
||||
} finally {
|
||||
setSaving(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<Card className="p-6">
|
||||
<p className="text-sm text-text-muted">{t("loading")}</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card className="p-6">
|
||||
<h3 className="text-lg font-semibold text-text-main flex items-center gap-2 mb-4">
|
||||
<span className="material-symbols-outlined text-[20px]">cached</span>
|
||||
{t("cacheSettings")}
|
||||
</h3>
|
||||
|
||||
<div className="space-y-6">
|
||||
{/* Semantic Cache */}
|
||||
<div className="space-y-3">
|
||||
<h4 className="text-sm font-medium text-text-main">{t("semanticCache")}</h4>
|
||||
|
||||
<label className="flex items-center justify-between">
|
||||
<span className="text-sm text-text-muted">{t("enabled")}</span>
|
||||
<button
|
||||
onClick={() =>
|
||||
setConfig((c) => ({ ...c, semanticCacheEnabled: !c.semanticCacheEnabled }))
|
||||
}
|
||||
className={`relative w-10 h-5 rounded-full transition-colors ${
|
||||
config.semanticCacheEnabled ? "bg-green-500" : "bg-border"
|
||||
}`}
|
||||
>
|
||||
<span
|
||||
className={`absolute top-0.5 w-4 h-4 rounded-full bg-white transition-transform ${
|
||||
config.semanticCacheEnabled ? "left-5" : "left-0.5"
|
||||
}`}
|
||||
/>
|
||||
</button>
|
||||
</label>
|
||||
|
||||
<label className="flex items-center justify-between">
|
||||
<span className="text-sm text-text-muted">{t("maxEntries")}</span>
|
||||
<input
|
||||
type="number"
|
||||
min={1}
|
||||
max={1000}
|
||||
value={config.semanticCacheMaxSize}
|
||||
onChange={(e) =>
|
||||
setConfig((c) => ({ ...c, semanticCacheMaxSize: parseInt(e.target.value) || 100 }))
|
||||
}
|
||||
className="w-24 px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
|
||||
/>
|
||||
</label>
|
||||
|
||||
<label className="flex items-center justify-between">
|
||||
<span className="text-sm text-text-muted">{t("ttlMinutes")}</span>
|
||||
<input
|
||||
type="number"
|
||||
min={1}
|
||||
max={1440}
|
||||
value={Math.round(config.semanticCacheTTL / 60000)}
|
||||
onChange={(e) =>
|
||||
setConfig((c) => ({
|
||||
...c,
|
||||
semanticCacheTTL: (parseInt(e.target.value) || 30) * 60000,
|
||||
}))
|
||||
}
|
||||
className="w-24 px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
|
||||
/>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
{/* Prompt Cache */}
|
||||
<div className="space-y-3 pt-4 border-t border-border/30">
|
||||
<h4 className="text-sm font-medium text-text-main">{t("promptCache")}</h4>
|
||||
|
||||
<label className="flex items-center justify-between">
|
||||
<span className="text-sm text-text-muted">{t("enabled")}</span>
|
||||
<button
|
||||
onClick={() =>
|
||||
setConfig((c) => ({ ...c, promptCacheEnabled: !c.promptCacheEnabled }))
|
||||
}
|
||||
className={`relative w-10 h-5 rounded-full transition-colors ${
|
||||
config.promptCacheEnabled ? "bg-green-500" : "bg-border"
|
||||
}`}
|
||||
>
|
||||
<span
|
||||
className={`absolute top-0.5 w-4 h-4 rounded-full bg-white transition-transform ${
|
||||
config.promptCacheEnabled ? "left-5" : "left-0.5"
|
||||
}`}
|
||||
/>
|
||||
</button>
|
||||
</label>
|
||||
|
||||
<label className="flex items-center justify-between">
|
||||
<span className="text-sm text-text-muted">{t("strategy")}</span>
|
||||
<select
|
||||
value={config.promptCacheStrategy}
|
||||
onChange={(e) =>
|
||||
setConfig((c) => ({
|
||||
...c,
|
||||
promptCacheStrategy: e.target.value as CacheConfig["promptCacheStrategy"],
|
||||
}))
|
||||
}
|
||||
className="px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
|
||||
>
|
||||
<option value="auto">Auto</option>
|
||||
<option value="system-only">System Only</option>
|
||||
<option value="manual">Manual</option>
|
||||
</select>
|
||||
</label>
|
||||
|
||||
<label className="flex items-center justify-between">
|
||||
<span className="text-sm text-text-muted">{t("preserveClientCache")}</span>
|
||||
<select
|
||||
value={config.alwaysPreserveClientCache}
|
||||
onChange={(e) =>
|
||||
setConfig((c) => ({
|
||||
...c,
|
||||
alwaysPreserveClientCache: e.target
|
||||
.value as CacheConfig["alwaysPreserveClientCache"],
|
||||
}))
|
||||
}
|
||||
className="px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
|
||||
>
|
||||
<option value="auto">Auto</option>
|
||||
<option value="always">Always</option>
|
||||
<option value="never">Never</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
{/* Save */}
|
||||
<div className="pt-4 border-t border-border/30">
|
||||
<Button onClick={handleSave} disabled={saving} size="sm">
|
||||
{saving ? t("saving") : t("save")}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
@@ -18,6 +18,7 @@ import ModelAliasesTab from "./components/ModelAliasesTab";
|
||||
import BackgroundDegradationTab from "./components/BackgroundDegradationTab";
|
||||
|
||||
import CacheStatsCard from "./components/CacheStatsCard";
|
||||
import CacheSettingsTab from "./components/CacheSettingsTab";
|
||||
import ResilienceTab from "./components/ResilienceTab";
|
||||
|
||||
const tabs = [
|
||||
@@ -89,6 +90,7 @@ export default function SettingsPage() {
|
||||
<CodexServiceTierTab />
|
||||
<SystemPromptTab />
|
||||
<CacheStatsCard />
|
||||
<CacheSettingsTab />
|
||||
</div>
|
||||
)}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ const QUOTA_BAR_YELLOW_THRESHOLD = 20;
|
||||
// Provider display config
|
||||
const PROVIDER_CONFIG = {
|
||||
antigravity: { label: "Antigravity", color: "#F59E0B" },
|
||||
"gemini-cli": { label: "Gemini CLI", color: "#4285F4" },
|
||||
github: { label: "GitHub Copilot", color: "#333" },
|
||||
kiro: { label: "Kiro AI", color: "#FF6B35" },
|
||||
codex: { label: "OpenAI Codex", color: "#10A37F" },
|
||||
@@ -279,12 +280,13 @@ export default function ProviderLimits() {
|
||||
const sortedConnections = useMemo(() => {
|
||||
const priority = {
|
||||
antigravity: 1,
|
||||
github: 2,
|
||||
codex: 3,
|
||||
claude: 4,
|
||||
kiro: 5,
|
||||
glm: 6,
|
||||
"kimi-coding": 7,
|
||||
"gemini-cli": 2,
|
||||
github: 3,
|
||||
codex: 4,
|
||||
claude: 5,
|
||||
kiro: 6,
|
||||
glm: 7,
|
||||
"kimi-coding": 8,
|
||||
};
|
||||
return [...filteredConnections].sort(
|
||||
(a, b) => (priority[a.provider] || 9) - (priority[b.provider] || 9)
|
||||
@@ -624,6 +626,7 @@ export default function ProviderLimits() {
|
||||
>
|
||||
{/* Model label */}
|
||||
<span
|
||||
title={q.modelKey || q.name}
|
||||
className="text-[11px] font-semibold py-0.5 px-2 rounded whitespace-nowrap min-w-[60px] text-center"
|
||||
style={{ background: colors.bg, color: colors.text }}
|
||||
>
|
||||
|
||||
@@ -11,15 +11,6 @@ const PROVIDER_PLAN_FALLBACKS = new Set([
|
||||
]);
|
||||
|
||||
const QUOTA_LABEL_MAP: Record<string, string> = {
|
||||
"gemini-3-pro-high": "G3 Pro",
|
||||
"gemini-3-pro-low": "G3 Pro Low",
|
||||
"gemini-3-flash": "G3 Flash",
|
||||
"gemini-2.5-flash": "G2.5 Flash",
|
||||
"claude-opus-4-6-thinking": "Opus 4.6 Tk",
|
||||
"claude-opus-4-5-thinking": "Opus 4.5 Tk",
|
||||
"claude-opus-4-5": "Opus 4.5",
|
||||
"claude-sonnet-4-5-thinking": "Sonnet 4.5 Tk",
|
||||
"claude-sonnet-4-5": "Sonnet 4.5",
|
||||
chat: "Chat",
|
||||
completions: "Completions",
|
||||
premium_interactions: "Premium",
|
||||
@@ -254,6 +245,14 @@ export function parseQuotaData(provider, data) {
|
||||
}
|
||||
break;
|
||||
|
||||
case "gemini-cli":
|
||||
if (data.quotas) {
|
||||
Object.entries(data.quotas).forEach(([modelKey, quota]: [string, any]) => {
|
||||
normalizedQuotas.push(normalizeQuotaEntry(modelKey, quota, { modelKey }));
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
// Generic fallback for unknown providers
|
||||
if (data.quotas) {
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import { getDiversityReport } from "../../../../../open-sse/services/autoCombo/providerDiversity";
|
||||
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
export async function GET() {
|
||||
try {
|
||||
const report = getDiversityReport();
|
||||
return NextResponse.json(report);
|
||||
} catch (error: any) {
|
||||
return NextResponse.json({ error: error.message }, { status: 500 });
|
||||
}
|
||||
}
|
||||
Vendored
+95
@@ -0,0 +1,95 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
import { getDbInstance } from "@/lib/db/core";
|
||||
|
||||
interface CacheEntry {
|
||||
id: string;
|
||||
signature: string;
|
||||
model: string;
|
||||
hit_count: number;
|
||||
tokens_saved: number;
|
||||
created_at: string;
|
||||
expires_at: string;
|
||||
}
|
||||
|
||||
export async function GET(req: NextRequest) {
|
||||
try {
|
||||
const { searchParams } = new URL(req.url);
|
||||
const page = Math.max(1, parseInt(searchParams.get("page") || "1", 10));
|
||||
const limit = Math.min(100, Math.max(1, parseInt(searchParams.get("limit") || "20", 10)));
|
||||
const search = searchParams.get("search") || "";
|
||||
const model = searchParams.get("model") || "";
|
||||
const sortBy = searchParams.get("sortBy") || "created_at";
|
||||
const sortOrder = searchParams.get("sortOrder") || "desc";
|
||||
|
||||
const db = getDbInstance();
|
||||
const offset = (page - 1) * limit;
|
||||
|
||||
const conditions: string[] = [];
|
||||
const params: unknown[] = [];
|
||||
|
||||
if (search) {
|
||||
conditions.push("(signature LIKE ? OR model LIKE ?)");
|
||||
params.push(`%${search}%`, `%${search}%`);
|
||||
}
|
||||
|
||||
if (model) {
|
||||
conditions.push("model = ?");
|
||||
params.push(model);
|
||||
}
|
||||
|
||||
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
||||
|
||||
const validSortColumns = ["created_at", "expires_at", "hit_count", "tokens_saved", "model"];
|
||||
const orderBy = validSortColumns.includes(sortBy) ? sortBy : "created_at";
|
||||
const order = sortOrder === "asc" ? "ASC" : "DESC";
|
||||
|
||||
const countRow = db
|
||||
.prepare(`SELECT COUNT(*) as total FROM semantic_cache ${whereClause}`)
|
||||
.get(...params) as { total: number };
|
||||
|
||||
const entries = db
|
||||
.prepare(
|
||||
`SELECT id, signature, model, hit_count, tokens_saved, created_at, expires_at
|
||||
FROM semantic_cache ${whereClause}
|
||||
ORDER BY ${orderBy} ${order}
|
||||
LIMIT ? OFFSET ?`
|
||||
)
|
||||
.all(...params, limit, offset) as CacheEntry[];
|
||||
|
||||
return NextResponse.json({
|
||||
entries,
|
||||
pagination: {
|
||||
page,
|
||||
limit,
|
||||
total: countRow?.total || 0,
|
||||
totalPages: Math.ceil((countRow?.total || 0) / limit),
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
return NextResponse.json({ error: String(error) }, { status: 500 });
|
||||
}
|
||||
}
|
||||
|
||||
export async function DELETE(req: NextRequest) {
|
||||
try {
|
||||
const { searchParams } = new URL(req.url);
|
||||
const signature = searchParams.get("signature");
|
||||
const model = searchParams.get("model");
|
||||
|
||||
const db = getDbInstance();
|
||||
|
||||
if (signature) {
|
||||
db.prepare("DELETE FROM semantic_cache WHERE signature = ?").run(signature);
|
||||
return NextResponse.json({ ok: true, deleted: 1 });
|
||||
}
|
||||
|
||||
if (model) {
|
||||
const result = db.prepare("DELETE FROM semantic_cache WHERE model = ?").run(model);
|
||||
return NextResponse.json({ ok: true, deleted: result.changes });
|
||||
}
|
||||
|
||||
return NextResponse.json({ error: "Provide signature or model parameter" }, { status: 400 });
|
||||
} catch (error) {
|
||||
return NextResponse.json({ error: String(error) }, { status: 500 });
|
||||
}
|
||||
}
|
||||
Vendored
+9
-15
@@ -8,21 +8,26 @@ import {
|
||||
invalidateStale,
|
||||
} from "@/lib/semanticCache";
|
||||
import { getIdempotencyStats } from "@/lib/idempotencyLayer";
|
||||
import { getCacheMetrics, getCacheTrend } from "@/lib/db/settings";
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/cache — Cache statistics
|
||||
*/
|
||||
export async function GET() {
|
||||
export async function GET(req: NextRequest) {
|
||||
try {
|
||||
const { searchParams } = new URL(req.url);
|
||||
const trendHours = parseInt(searchParams.get("trendHours") || "24", 10);
|
||||
|
||||
const cacheStats = getCacheStats();
|
||||
const idempotencyStats = getIdempotencyStats();
|
||||
const promptCacheMetrics = await getCacheMetrics();
|
||||
const trend = await getCacheTrend(trendHours);
|
||||
|
||||
return NextResponse.json({
|
||||
semanticCache: cacheStats,
|
||||
promptCache: promptCacheMetrics,
|
||||
trend,
|
||||
idempotency: idempotencyStats,
|
||||
});
|
||||
} catch (error) {
|
||||
@@ -30,17 +35,6 @@ export async function GET() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* DELETE /api/cache — Clear all caches or targeted invalidation.
|
||||
*
|
||||
* Exactly one optional query parameter may be provided:
|
||||
* ?model=<name> — invalidate all entries for a specific model
|
||||
* ?signature=<hex> — invalidate a single entry by its SHA-256 signature
|
||||
* ?staleMs=<number> — invalidate entries older than N milliseconds
|
||||
* (no params) — clear all cache entries
|
||||
*
|
||||
* Providing more than one parameter returns 400 Bad Request.
|
||||
*/
|
||||
export async function DELETE(req: NextRequest) {
|
||||
try {
|
||||
const { searchParams } = new URL(req.url);
|
||||
|
||||
@@ -139,19 +139,7 @@ const PROVIDER_MODELS_CONFIG: Record<string, ProviderModelsConfigEntry> = {
|
||||
name: m.displayName || (m.name || "").replace(/^models\//, ""),
|
||||
})),
|
||||
},
|
||||
"gemini-cli": {
|
||||
url: "https://generativelanguage.googleapis.com/v1beta/models",
|
||||
method: "GET",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
authHeader: "Authorization",
|
||||
authPrefix: "Bearer ",
|
||||
parseResponse: (data) =>
|
||||
(data.models || []).map((m) => ({
|
||||
...m,
|
||||
id: (m.name || m.id || "").replace(/^models\//, ""),
|
||||
name: m.displayName || (m.name || "").replace(/^models\//, ""),
|
||||
})),
|
||||
},
|
||||
// gemini-cli handled via retrieveUserQuota (see GET handler)
|
||||
qwen: {
|
||||
url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models",
|
||||
method: "GET",
|
||||
@@ -505,6 +493,73 @@ export async function GET(
|
||||
return buildResponse({ provider, connectionId, models });
|
||||
}
|
||||
|
||||
if (provider === "gemini-cli") {
|
||||
// Gemini CLI doesn't have a /models endpoint. Instead, query the quota
|
||||
// endpoint to discover available models from the quota buckets.
|
||||
if (!accessToken) {
|
||||
return NextResponse.json(
|
||||
{ error: "No access token for Gemini CLI. Please reconnect OAuth." },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
const psd = asRecord(connection.providerSpecificData);
|
||||
const projectId =
|
||||
connection.projectId || psd.projectId || null;
|
||||
|
||||
if (!projectId) {
|
||||
return NextResponse.json(
|
||||
{ error: "Gemini CLI project ID not available. Please reconnect OAuth." },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
const quotaRes = await fetch(
|
||||
"https://cloudcode-pa.googleapis.com/v1internal:retrieveUserQuota",
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ project: projectId }),
|
||||
signal: AbortSignal.timeout(10000),
|
||||
}
|
||||
);
|
||||
|
||||
if (!quotaRes.ok) {
|
||||
const errText = await quotaRes.text();
|
||||
console.log(`[models] Gemini CLI quota fetch failed (${quotaRes.status}):`, errText);
|
||||
return NextResponse.json(
|
||||
{ error: `Failed to fetch Gemini CLI models: ${quotaRes.status}` },
|
||||
{ status: quotaRes.status }
|
||||
);
|
||||
}
|
||||
|
||||
const quotaData = await quotaRes.json();
|
||||
const buckets: Array<{ modelId?: string; tokenType?: string }> =
|
||||
quotaData.buckets || [];
|
||||
|
||||
const models = buckets
|
||||
.filter((b) => b.modelId)
|
||||
.map((b) => ({
|
||||
id: b.modelId,
|
||||
name: b.modelId,
|
||||
owned_by: "google",
|
||||
}));
|
||||
|
||||
return buildResponse({ provider, connectionId, models });
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.log("[models] Gemini CLI model fetch error:", msg);
|
||||
return NextResponse.json(
|
||||
{ error: "Failed to fetch Gemini CLI models" },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (isAnthropicCompatibleProvider(provider)) {
|
||||
let baseUrl = getProviderBaseUrl(connection.providerSpecificData);
|
||||
if (!baseUrl) {
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
import { getSettings, updateSettings } from "@/lib/localDb";
|
||||
import { isAuthenticated } from "@/shared/utils/apiAuth";
|
||||
|
||||
const CACHE_CONFIG_KEYS = [
|
||||
"semanticCacheEnabled",
|
||||
"semanticCacheMaxSize",
|
||||
"semanticCacheTTL",
|
||||
"promptCacheEnabled",
|
||||
"promptCacheStrategy",
|
||||
"alwaysPreserveClientCache",
|
||||
] as const;
|
||||
|
||||
const DEFAULTS = {
|
||||
semanticCacheEnabled: true,
|
||||
semanticCacheMaxSize: 100,
|
||||
semanticCacheTTL: 1800000,
|
||||
promptCacheEnabled: true,
|
||||
promptCacheStrategy: "auto",
|
||||
alwaysPreserveClientCache: "auto",
|
||||
};
|
||||
|
||||
export async function GET(request: NextRequest) {
|
||||
if (!(await isAuthenticated(request))) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
try {
|
||||
const settings = await getSettings();
|
||||
const config: Record<string, unknown> = {};
|
||||
for (const key of CACHE_CONFIG_KEYS) {
|
||||
config[key] = settings[key] ?? DEFAULTS[key];
|
||||
}
|
||||
return NextResponse.json(config);
|
||||
} catch (error) {
|
||||
return NextResponse.json({ error: String(error) }, { status: 500 });
|
||||
}
|
||||
}
|
||||
|
||||
export async function PUT(request: NextRequest) {
|
||||
if (!(await isAuthenticated(request))) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
try {
|
||||
const body = await request.json();
|
||||
const updates: Record<string, unknown> = {};
|
||||
|
||||
if (typeof body.semanticCacheEnabled === "boolean") {
|
||||
updates.semanticCacheEnabled = body.semanticCacheEnabled;
|
||||
}
|
||||
if (typeof body.semanticCacheMaxSize === "number" && body.semanticCacheMaxSize > 0) {
|
||||
updates.semanticCacheMaxSize = body.semanticCacheMaxSize;
|
||||
}
|
||||
if (typeof body.semanticCacheTTL === "number" && body.semanticCacheTTL > 0) {
|
||||
updates.semanticCacheTTL = body.semanticCacheTTL;
|
||||
}
|
||||
if (typeof body.promptCacheEnabled === "boolean") {
|
||||
updates.promptCacheEnabled = body.promptCacheEnabled;
|
||||
}
|
||||
if (["auto", "system-only", "manual"].includes(body.promptCacheStrategy)) {
|
||||
updates.promptCacheStrategy = body.promptCacheStrategy;
|
||||
}
|
||||
if (["auto", "always", "never"].includes(body.alwaysPreserveClientCache)) {
|
||||
updates.alwaysPreserveClientCache = body.alwaysPreserveClientCache;
|
||||
}
|
||||
|
||||
await updateSettings(updates);
|
||||
return NextResponse.json({ ok: true });
|
||||
} catch (error) {
|
||||
return NextResponse.json({ error: String(error) }, { status: 500 });
|
||||
}
|
||||
}
|
||||
@@ -1712,6 +1712,17 @@
|
||||
"cacheMisses": "Cache Misses",
|
||||
"hitRate": "Hit Rate",
|
||||
"cacheEntries": "Cache Entries",
|
||||
"cacheSettings": "Cache Settings",
|
||||
"semanticCache": "Semantic Cache",
|
||||
"maxEntries": "Max Entries",
|
||||
"ttlMinutes": "TTL (minutes)",
|
||||
"promptCache": "Prompt Cache",
|
||||
"strategy": "Strategy",
|
||||
"preserveClientCache": "Preserve Client Cache",
|
||||
"enabled": "Enabled",
|
||||
"loading": "Loading...",
|
||||
"saving": "Saving...",
|
||||
"save": "Save",
|
||||
"circuitBreaker": "Circuit Breaker",
|
||||
"retryPolicy": "Retry Policy",
|
||||
"maxRetries": "Max Retries",
|
||||
@@ -2920,6 +2931,30 @@
|
||||
"clearSuccess": "Cache cleared. {count} expired entries removed.",
|
||||
"clearError": "Failed to clear cache.",
|
||||
"unavailable": "Cache unavailable",
|
||||
"unavailableDesc": "Could not fetch cache statistics. Make sure the server is running."
|
||||
"unavailableDesc": "Could not fetch cache statistics. Make sure the server is running.",
|
||||
"promptCache": "Prompt Cache (Provider-Side)",
|
||||
"cachedRequests": "Cached Requests",
|
||||
"cacheHitRate": "Cache Hit Rate",
|
||||
"cachedTokens": "Cached Tokens",
|
||||
"cacheCreationTokens": "Cache Creation Tokens",
|
||||
"byProvider": "Breakdown by Provider",
|
||||
"provider": "Provider",
|
||||
"requests": "Requests",
|
||||
"inputTokens": "Input Tokens",
|
||||
"cachedTokensCol": "Cached",
|
||||
"cacheCreation": "Creation",
|
||||
"trend24h": "Cache Trend (24h)",
|
||||
"cached": "Cached",
|
||||
"overview": "Overview",
|
||||
"entries": "Entries",
|
||||
"searchEntries": "Search entries...",
|
||||
"search": "Search",
|
||||
"loading": "Loading...",
|
||||
"noEntries": "No cache entries found",
|
||||
"signature": "Signature",
|
||||
"model": "Model",
|
||||
"created": "Created",
|
||||
"expires": "Expires",
|
||||
"actions": "Actions"
|
||||
}
|
||||
}
|
||||
|
||||
+58
-1
@@ -577,9 +577,14 @@ export async function getCacheMetrics() {
|
||||
cacheCreationTokens: number | null;
|
||||
}>;
|
||||
|
||||
// Calculate tokens saved (cached tokens are reused, not charged at full price)
|
||||
const tokensSaved = totalsRow?.totalCachedTokens || 0;
|
||||
|
||||
const AVG_INPUT_PRICE_PER_MILLION = 3;
|
||||
const CACHE_DISCOUNT = 0.9;
|
||||
const estimatedCostSaved =
|
||||
Math.round((tokensSaved / 1_000_000) * AVG_INPUT_PRICE_PER_MILLION * CACHE_DISCOUNT * 100) /
|
||||
100;
|
||||
|
||||
// Build byProvider object
|
||||
const byProvider: Record<
|
||||
string,
|
||||
@@ -653,6 +658,58 @@ export async function updateCacheMetrics(_metrics: Record<string, unknown>) {
|
||||
return getCacheMetrics();
|
||||
}
|
||||
|
||||
export interface CacheTrendPoint {
|
||||
timestamp: string;
|
||||
requests: number;
|
||||
cachedRequests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
|
||||
export async function getCacheTrend(hours = 24): Promise<CacheTrendPoint[]> {
|
||||
const db = getDbInstance();
|
||||
|
||||
try {
|
||||
const rows = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
strftime('%Y-%m-%dT%H:00:00Z', timestamp) as hour,
|
||||
COUNT(*) as requests,
|
||||
SUM(CASE WHEN tokens_cache_read > 0 OR tokens_cache_creation > 0 THEN 1 ELSE 0 END) as cachedRequests,
|
||||
SUM(tokens_input) as inputTokens,
|
||||
SUM(tokens_cache_read) as cachedTokens,
|
||||
SUM(tokens_cache_creation) as cacheCreationTokens
|
||||
FROM usage_history
|
||||
WHERE timestamp >= datetime('now', ?)
|
||||
GROUP BY hour
|
||||
ORDER BY hour ASC
|
||||
`
|
||||
)
|
||||
.all(`-${hours} hours`) as Array<{
|
||||
hour: string;
|
||||
requests: number;
|
||||
cachedRequests: number;
|
||||
inputTokens: number | null;
|
||||
cachedTokens: number | null;
|
||||
cacheCreationTokens: number | null;
|
||||
}>;
|
||||
|
||||
return rows.map((r) => ({
|
||||
timestamp: r.hour,
|
||||
requests: r.requests,
|
||||
cachedRequests: r.cachedRequests,
|
||||
inputTokens: r.inputTokens || 0,
|
||||
cachedTokens: r.cachedTokens || 0,
|
||||
cacheCreationTokens: r.cacheCreationTokens || 0,
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch cache trend:", error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export async function resetCacheMetrics() {
|
||||
// No-op: cannot delete historical usage data
|
||||
// Cache metrics are computed from usage_history, so they reflect actual request history
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
export { analyzePrefix, shouldInjectCacheControl } from "./prefixAnalyzer";
|
||||
@@ -0,0 +1,77 @@
|
||||
import crypto from "crypto";
|
||||
|
||||
interface Message {
|
||||
role: string;
|
||||
content: string | unknown[];
|
||||
}
|
||||
|
||||
interface PrefixAnalysis {
|
||||
prefixEndIdx: number;
|
||||
prefixHash: string;
|
||||
prefixTokens: number;
|
||||
prefixType: "system_only" | "system_and_tools" | "system_tools_history";
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
function normalizeContent(content: string | unknown[]): string {
|
||||
if (typeof content === "string") return content;
|
||||
return JSON.stringify(content);
|
||||
}
|
||||
|
||||
function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
export function analyzePrefix(messages: Message[]): PrefixAnalysis {
|
||||
if (!Array.isArray(messages) || messages.length === 0) {
|
||||
return {
|
||||
prefixEndIdx: -1,
|
||||
prefixHash: "",
|
||||
prefixTokens: 0,
|
||||
prefixType: "system_only",
|
||||
confidence: 0,
|
||||
};
|
||||
}
|
||||
|
||||
let prefixEndIdx = -1;
|
||||
let prefixType: PrefixAnalysis["prefixType"] = "system_only";
|
||||
let confidence = 0.5;
|
||||
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const msg = messages[i];
|
||||
const role = msg.role || "user";
|
||||
|
||||
if (role === "system") {
|
||||
prefixEndIdx = i;
|
||||
prefixType = "system_only";
|
||||
confidence = 0.9;
|
||||
} else if (role === "tool" || (role === "assistant" && Array.isArray(msg.content))) {
|
||||
prefixEndIdx = i;
|
||||
prefixType = "system_and_tools";
|
||||
confidence = 0.8;
|
||||
} else if (role === "assistant") {
|
||||
prefixEndIdx = i;
|
||||
prefixType = "system_tools_history";
|
||||
confidence = 0.7;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const prefixMessages = messages.slice(0, prefixEndIdx + 1);
|
||||
const prefixText = prefixMessages.map((m) => normalizeContent(m.content)).join("\n");
|
||||
const prefixHash = crypto.createHash("sha256").update(prefixText).digest("hex");
|
||||
const prefixTokens = estimateTokens(prefixText);
|
||||
|
||||
return {
|
||||
prefixEndIdx,
|
||||
prefixHash,
|
||||
prefixTokens,
|
||||
prefixType,
|
||||
confidence,
|
||||
};
|
||||
}
|
||||
|
||||
export function shouldInjectCacheControl(analysis: PrefixAnalysis, minTokens = 1024): boolean {
|
||||
return analysis.prefixTokens >= minTokens && analysis.confidence >= 0.7;
|
||||
}
|
||||
+67
-18
@@ -29,6 +29,45 @@ function toNumber(value: unknown, fallback = 0): number {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function ensureCacheMetricsTable() {
|
||||
try {
|
||||
const db = getDbInstance();
|
||||
db.prepare(
|
||||
`CREATE TABLE IF NOT EXISTS cache_metrics (
|
||||
key TEXT PRIMARY KEY,
|
||||
value INTEGER NOT NULL DEFAULT 0,
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`
|
||||
).run();
|
||||
db.prepare(
|
||||
`INSERT OR IGNORE INTO cache_metrics (key, value) VALUES ('hits', 0), ('misses', 0), ('tokens_saved', 0)`
|
||||
).run();
|
||||
} catch {
|
||||
// DB not available
|
||||
}
|
||||
}
|
||||
|
||||
function incrementMetric(metric: "hits" | "misses" | "tokens_saved", amount = 1) {
|
||||
try {
|
||||
const db = getDbInstance();
|
||||
db.prepare(
|
||||
`UPDATE cache_metrics SET value = value + ?, updated_at = datetime('now') WHERE key = ?`
|
||||
).run(amount, metric);
|
||||
} catch {
|
||||
// DB not available — fall back to in-memory
|
||||
}
|
||||
}
|
||||
|
||||
function getMetricValue(metric: string): number {
|
||||
try {
|
||||
const db = getDbInstance();
|
||||
const row = db.prepare(`SELECT value FROM cache_metrics WHERE key = ?`).get(metric);
|
||||
return row ? toNumber(asRecord(row).value, 0) : 0;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
function getHeaderValue(
|
||||
headers: { get?: (name: string) => string | null } | Record<string, unknown> | null | undefined,
|
||||
name: string
|
||||
@@ -51,7 +90,6 @@ function getHeaderValue(
|
||||
// ─── Singleton ─────────────────
|
||||
|
||||
let memoryCache: LRUCache | null = null;
|
||||
let stats = { hits: 0, misses: 0, tokensSaved: 0 };
|
||||
|
||||
function getMemoryCache() {
|
||||
if (!memoryCache) {
|
||||
@@ -60,6 +98,7 @@ function getMemoryCache() {
|
||||
maxBytes: parseInt(process.env.SEMANTIC_CACHE_MAX_BYTES || String(4 * 1024 * 1024), 10),
|
||||
defaultTTL: parseInt(process.env.SEMANTIC_CACHE_TTL_MS || "1800000", 10),
|
||||
});
|
||||
ensureCacheMetricsTable();
|
||||
}
|
||||
return memoryCache;
|
||||
}
|
||||
@@ -108,8 +147,8 @@ export function getCachedResponse(signature) {
|
||||
// 1. Check memory cache
|
||||
const memResult = getMemoryCache().get(signature);
|
||||
if (memResult) {
|
||||
stats.hits++;
|
||||
stats.tokensSaved += memResult.tokensSaved || 0;
|
||||
incrementMetric("hits");
|
||||
incrementMetric("tokens_saved", memResult.tokensSaved || 0);
|
||||
return memResult.response;
|
||||
}
|
||||
|
||||
@@ -126,7 +165,7 @@ export function getCachedResponse(signature) {
|
||||
const record = asRecord(row);
|
||||
const responsePayload = typeof record.response === "string" ? record.response : null;
|
||||
if (!responsePayload) {
|
||||
stats.misses++;
|
||||
incrementMetric("misses");
|
||||
return null;
|
||||
}
|
||||
const parsed = JSON.parse(responsePayload);
|
||||
@@ -141,15 +180,15 @@ export function getCachedResponse(signature) {
|
||||
signature
|
||||
);
|
||||
|
||||
stats.hits++;
|
||||
stats.tokensSaved += tokensSaved;
|
||||
incrementMetric("hits");
|
||||
incrementMetric("tokens_saved", tokensSaved);
|
||||
return parsed;
|
||||
}
|
||||
} catch {
|
||||
// DB not available — fail open
|
||||
}
|
||||
|
||||
stats.misses++;
|
||||
incrementMetric("misses");
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -280,6 +319,17 @@ export function stopAutoCleanup(): void {
|
||||
}
|
||||
}
|
||||
|
||||
export function cleanOldMetrics(retentionDays = 90): number {
|
||||
try {
|
||||
const db = getDbInstance();
|
||||
const cutoff = new Date(Date.now() - retentionDays * 86400000).toISOString();
|
||||
const result = db.prepare("DELETE FROM semantic_cache WHERE created_at < ?").run(cutoff);
|
||||
return result.changes || 0;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cache entries.
|
||||
*/
|
||||
@@ -288,17 +338,12 @@ export function clearCache() {
|
||||
try {
|
||||
const db = getDbInstance();
|
||||
db.prepare("DELETE FROM semantic_cache").run();
|
||||
db.prepare("UPDATE cache_metrics SET value = 0").run();
|
||||
} catch {
|
||||
// DB not available
|
||||
}
|
||||
stats = { hits: 0, misses: 0, tokensSaved: 0 };
|
||||
}
|
||||
|
||||
// ─── Stats ─────────────────
|
||||
|
||||
/**
|
||||
* Get cache statistics.
|
||||
*/
|
||||
export function getCacheStats() {
|
||||
const memStats = getMemoryCache().getStats();
|
||||
let dbSize = 0;
|
||||
@@ -312,14 +357,18 @@ export function getCacheStats() {
|
||||
// DB not available
|
||||
}
|
||||
|
||||
const total = stats.hits + stats.misses;
|
||||
const hits = getMetricValue("hits");
|
||||
const misses = getMetricValue("misses");
|
||||
const tokensSaved = getMetricValue("tokens_saved");
|
||||
const total = hits + misses;
|
||||
|
||||
return {
|
||||
memoryEntries: memStats.size,
|
||||
dbEntries: dbSize,
|
||||
hits: stats.hits,
|
||||
misses: stats.misses,
|
||||
hitRate: total > 0 ? ((stats.hits / total) * 100).toFixed(1) : "0.0",
|
||||
tokensSaved: stats.tokensSaved,
|
||||
hits,
|
||||
misses,
|
||||
hitRate: total > 0 ? ((hits / total) * 100).toFixed(1) : "0.0",
|
||||
tokensSaved,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -656,6 +656,7 @@ export const ID_TO_ALIAS = Object.values(AI_PROVIDERS).reduce((acc, p) => {
|
||||
// Providers that support usage/quota API
|
||||
export const USAGE_SUPPORTED_PROVIDERS = [
|
||||
"antigravity",
|
||||
"gemini-cli",
|
||||
"kiro",
|
||||
"github",
|
||||
"codex",
|
||||
|
||||
Reference in New Issue
Block a user