Compare commits
70 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d3dfd9ce57 | |||
| aa06d5d356 | |||
| 448c8a29e1 | |||
| 928b7120f4 | |||
| a3deacd718 | |||
| 78959fffbd | |||
| 1788616e52 | |||
| c61e6d0777 | |||
| a3bc7620b1 | |||
| 8064c588dc | |||
| 564e983c68 | |||
| e1da181740 | |||
| c63209200e | |||
| 737808cf53 | |||
| a197bb7736 | |||
| f9dd967bc5 | |||
| 44e4d55a66 | |||
| 095c84ac16 | |||
| e063eae727 | |||
| f02c5b5c69 | |||
| 838f1d645c | |||
| ce2c30c437 | |||
| d56fae0a7b | |||
| e45ef00bef | |||
| e9f31f7394 | |||
| 7c10a98eb2 | |||
| f260483101 | |||
| 389e6e5c9e | |||
| 1cfd5866be | |||
| c7ceac7f41 | |||
| cd6eca0424 | |||
| 8c6136fea0 | |||
| 9644444028 | |||
| 9c4154291d | |||
| 533f5f6da6 | |||
| 1b8de756cd | |||
| 650b415537 | |||
| 04b50329fc | |||
| 25aab8c55c | |||
| ceda2e70c1 | |||
| 2908303d4b | |||
| a9f69711c6 | |||
| a8ab16a720 | |||
| 8091b6b508 | |||
| a00ef0fc7e | |||
| 5ce6d615a4 | |||
| e06b69cdac | |||
| d261ae7883 | |||
| 6fa77a63d7 | |||
| f76c1b32d6 | |||
| 0aede2ef63 | |||
| 1e3a2e0a27 | |||
| 1bdabf43db | |||
| 05e568feb0 | |||
| 81e2519436 | |||
| ef623c9bb5 | |||
| da581525a6 | |||
| 6ff7b6570c | |||
| 8b2081837e | |||
| ce978b602a | |||
| 9b00f5d550 | |||
| d98ec59c79 | |||
| d79b55be5a | |||
| 1f9a402dcd | |||
| f9bcc9418b | |||
| 08256a3502 | |||
| 9b255e643a | |||
| ca1f918e9e | |||
| bb3fe1cd48 | |||
| d139b4557f |
@@ -4,73 +4,81 @@ description: Deploy the latest OmniRoute code to the Akamai VPS (69.164.221.35)
|
||||
|
||||
# Deploy to VPS Workflow
|
||||
|
||||
Deploy OmniRoute to the production VPS using `npm install -g` + PM2.
|
||||
Deploy OmniRoute to the production VPS using `npm pack + scp` + PM2.
|
||||
|
||||
**VPS:** `69.164.221.35` (Akamai, Ubuntu 24.04, 1GB RAM + 2.5GB swap)
|
||||
**Local VPS:** `192.168.0.15` (same setup)
|
||||
**Process manager:** PM2 (`omniroute`)
|
||||
**Port:** `20128`
|
||||
**PM2 entry:** `/usr/lib/node_modules/omniroute/app/server.js`
|
||||
|
||||
> [!IMPORTANT]
|
||||
> PM2 runs from the global npm package at `/usr/lib/node_modules/omniroute`.
|
||||
> **DO NOT** use git clone or local copies. The `npm install -g` command handles
|
||||
> building, publishing, and installing the standalone app in one step.
|
||||
> The Next.js standalone build is at `app/server.js` inside that directory.
|
||||
> The npm registry rejects packages > 100MB, so deployment uses **npm pack + scp**.
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Publish to npm
|
||||
### 1. Build + pack locally
|
||||
|
||||
Ensure the version in `package.json` is bumped and the package is published:
|
||||
Run the full build (includes hash-strip patch) and create the .tgz:
|
||||
|
||||
// turbo
|
||||
|
||||
```bash
|
||||
npm publish
|
||||
cd /home/diegosouzapw/dev/proxys/9router && npm run build:cli && npm pack --ignore-scripts
|
||||
```
|
||||
|
||||
### 2. Install on VPS and restart PM2
|
||||
### 2. Copy to both VPS and install
|
||||
|
||||
// turbo-all
|
||||
|
||||
```bash
|
||||
ssh root@69.164.221.35 "npm install -g omniroute@latest && pm2 restart omniroute && pm2 save && echo '✅ Deploy complete!'"
|
||||
scp omniroute-*.tgz root@69.164.221.35:/tmp/ && scp omniroute-*.tgz root@192.168.0.15:/tmp/
|
||||
```
|
||||
|
||||
For the local VPS:
|
||||
```bash
|
||||
ssh root@69.164.221.35 "npm install -g /tmp/omniroute-*.tgz --ignore-scripts && pm2 restart omniroute && pm2 save && echo '✅ Akamai done'"
|
||||
```
|
||||
|
||||
```bash
|
||||
ssh root@192.168.0.15 "npm install -g omniroute@latest && pm2 restart omniroute && pm2 save && echo '✅ Deploy complete!'"
|
||||
ssh root@192.168.0.15 "npm install -g /tmp/omniroute-*.tgz --ignore-scripts && pm2 restart omniroute && pm2 save && echo '✅ Local done'"
|
||||
```
|
||||
|
||||
### 3. Verify the deployment
|
||||
|
||||
```bash
|
||||
ssh root@69.164.221.35 "pm2 list && cat \$(npm root -g)/omniroute/package.json | grep version | head -1 && curl -s -o /dev/null -w 'HTTP %{http_code}' http://localhost:20128/"
|
||||
ssh root@69.164.221.35 "pm2 list && cat \$(npm root -g)/omniroute/app/package.json | grep version | head -1 && curl -s -o /dev/null -w 'HTTP %{http_code}' http://localhost:20128/"
|
||||
```
|
||||
|
||||
Expected: PM2 shows `online`, version matches published, HTTP returns `307` (redirect to login).
|
||||
Expected: PM2 shows `online`, version matches, HTTP returns `307`.
|
||||
|
||||
## How it works
|
||||
|
||||
1. `npm publish` builds Next.js standalone + bundles everything into the npm package
|
||||
2. `npm install -g omniroute@latest` downloads and installs to `/usr/lib/node_modules/omniroute/`
|
||||
3. PM2 is registered to run `npm start` from that directory (cwd: `/usr/lib/node_modules/omniroute`)
|
||||
4. `pm2 restart omniroute` picks up the new code immediately
|
||||
1. `npm run build:cli` builds Next.js standalone → `app/` and strips Turbopack hashed require() calls from chunks
|
||||
2. `npm pack --ignore-scripts` packages without re-running the build
|
||||
3. `scp` transfers the .tgz to each VPS (~286MB)
|
||||
4. `npm install -g /tmp/omniroute-*.tgz --ignore-scripts` installs pre-built package
|
||||
5. PM2 runs `app/server.js` from `/usr/lib/node_modules/omniroute`
|
||||
|
||||
## PM2 Setup (one-time)
|
||||
|
||||
If PM2 needs to be reconfigured from scratch:
|
||||
## PM2 Setup (one-time — if reconfiguring from scratch)
|
||||
|
||||
```bash
|
||||
ssh root@<VPS> "
|
||||
cd /usr/lib/node_modules/omniroute &&
|
||||
PORT=20128 pm2 start app/server.js --name omniroute --env PORT=20128 &&
|
||||
pm2 save &&
|
||||
pm2 startup
|
||||
pm2 delete omniroute ;
|
||||
cp /opt/omniroute-app/.env /usr/lib/node_modules/omniroute/.env &&
|
||||
PORT=20128 pm2 start /usr/lib/node_modules/omniroute/app/server.js --name omniroute --cwd /usr/lib/node_modules/omniroute/app &&
|
||||
pm2 save && pm2 startup
|
||||
"
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> Copy `.env` from the old installation first. For Akamai it was at `/opt/omniroute-app/.env`,
|
||||
> for the local VPS it was at `/root/omniroute-fresh/.env`.
|
||||
|
||||
## Notes
|
||||
|
||||
- The `.env` file is at `/usr/lib/node_modules/omniroute/.env`. Back it up before major npm updates.
|
||||
- PM2 is configured with `pm2 startup` to auto-restart on reboot.
|
||||
- Nginx proxies `omniroute.online` → `localhost:20128`.
|
||||
- The VPS has only 1GB RAM — builds happen locally via `npm publish`, not on the VPS.
|
||||
- `.env` should be placed at `/usr/lib/node_modules/omniroute/app/.env`
|
||||
- PM2 is configured with `pm2 startup` to auto-restart on reboot
|
||||
- Nginx proxies `omniroute.online` → `localhost:20128`
|
||||
- The VPS has only 1GB RAM — builds happen locally, never on the VPS
|
||||
|
||||
@@ -32,6 +32,27 @@ Version format: `2.x.y` — examples:
|
||||
npm version patch --no-git-tag-version
|
||||
```
|
||||
|
||||
> **⚠️ ATOMIC COMMIT RULE — Version bump MUST happen before committing feature files.**
|
||||
>
|
||||
> **CORRECT order:**
|
||||
>
|
||||
> 1. `npm version patch --no-git-tag-version` ← bump first
|
||||
> 2. implement features / fix bugs
|
||||
> 3. `git add -A && git commit -m "chore(release): v2.x.y — all changes in ONE commit"`
|
||||
>
|
||||
> **OR if features are already staged:**
|
||||
>
|
||||
> 1. implement features (do NOT commit yet)
|
||||
> 2. `npm version patch --no-git-tag-version` ← bump before committing
|
||||
> 3. `git add -A && git commit -m "chore(release): v2.x.y — all changes in ONE commit"`
|
||||
>
|
||||
> **NEVER do this (creates version mismatch in git history):**
|
||||
>
|
||||
> - ~~commit features → then bump version → commit package.json separately~~
|
||||
>
|
||||
> This ensures that `git show v2.x.y` always contains both code changes and the version bump together.
|
||||
> The GitHub release tag will point to a commit that includes ALL changes for that version.
|
||||
|
||||
### 2. Regenerate lock file (REQUIRED after version bump)
|
||||
|
||||
**Mandatory** — skipping causes `@swc/helpers` lock mismatch and CI failures:
|
||||
@@ -85,12 +106,49 @@ git push origin main --tags
|
||||
gh release create v2.x.y --title "v2.x.y — summary" --notes "..."
|
||||
```
|
||||
|
||||
### 8. Deploy to VPS (if requested)
|
||||
### 8. 🐳 Trigger Docker Hub build (MANDATORY — keep npm and Docker in sync)
|
||||
|
||||
See `/deploy-vps` workflow for Akamai VPS or use npm for local VPS:
|
||||
> **CRITICAL**: Docker Hub and npm MUST always publish the same version.
|
||||
> The Docker image is built automatically via GitHub Actions when a new tag is pushed.
|
||||
> After pushing the tag in step 5-6, **verify the workflow runs**:
|
||||
|
||||
```bash
|
||||
ssh root@<VPS_IP> "npm install -g omniroute@2.x.y && pm2 restart omniroute"
|
||||
# Verify the Docker workflow triggered
|
||||
gh run list --repo diegosouzapw/OmniRoute --workflow docker-publish.yml --limit 3
|
||||
|
||||
# Wait for the Docker build to complete (usually 5–10 min)
|
||||
gh run watch --repo diegosouzapw/OmniRoute
|
||||
|
||||
# After completion, verify on Docker Hub:
|
||||
# https://hub.docker.com/r/diegosouzapw/omniroute/tags
|
||||
```
|
||||
|
||||
If the Docker build was not triggered automatically, trigger it manually:
|
||||
|
||||
```bash
|
||||
gh workflow run docker-publish.yml --repo diegosouzapw/OmniRoute --ref v2.x.y
|
||||
```
|
||||
|
||||
### 9. Deploy to BOTH VPS environments (MANDATORY)
|
||||
|
||||
> Always deploy to **both** environments after every release.
|
||||
> See `/deploy-vps` workflow for detailed steps.
|
||||
|
||||
```bash
|
||||
# Build and pack locally
|
||||
cd /home/diegosouzapw/dev/proxys/9router && npm run build:cli && npm pack --ignore-scripts
|
||||
|
||||
# Deploy to LOCAL VPS (192.168.0.15)
|
||||
scp omniroute-*.tgz root@192.168.0.15:/tmp/
|
||||
ssh root@192.168.0.15 "npm install -g /tmp/omniroute-*.tgz --ignore-scripts && pm2 restart omniroute && pm2 save"
|
||||
|
||||
# Deploy to AKAMAI VPS (69.164.221.35)
|
||||
scp omniroute-*.tgz root@69.164.221.35:/tmp/
|
||||
ssh root@69.164.221.35 "npm install -g /tmp/omniroute-*.tgz --ignore-scripts && pm2 restart omniroute && pm2 save"
|
||||
|
||||
# Verify both
|
||||
curl -s -o /dev/null -w "LOCAL: HTTP %{http_code}\n" http://192.168.0.15:20128/
|
||||
curl -s -o /dev/null -w "AKAMAI: HTTP %{http_code}\n" http://69.164.221.35:20128/
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
@@ -21,8 +21,8 @@ This workflow fetches all open issues from the project's GitHub repository, clas
|
||||
|
||||
// turbo
|
||||
|
||||
- Run: `gh issue list --repo <owner>/<repo> --state open --limit 100 --json number,title,labels,body,comments,createdAt,author`
|
||||
- Parse the JSON output to get a list of all open issues
|
||||
- Run: `gh issue list --repo <owner>/<repo> --state open --limit 500 --json number,title,labels,body,comments,createdAt,author`
|
||||
- Parse the JSON output to get a list of **all** open issues
|
||||
- Sort by oldest first (FIFO)
|
||||
|
||||
### 3. Classify Each Issue
|
||||
|
||||
@@ -18,7 +18,11 @@ This workflow fetches all open PRs from the project's GitHub repository, perform
|
||||
|
||||
### 2. Fetch Open Pull Requests
|
||||
|
||||
- Navigate to `https://github.com/<owner>/<repo>/pulls` and scrape all open PRs
|
||||
// turbo
|
||||
|
||||
- Run: `gh pr list --repo <owner>/<repo> --state open --limit 500 --json number,title,author,headRefName,body,createdAt,additions,deletions,files`
|
||||
- This fetches **all** open PRs without restriction. Get the diff for each with:
|
||||
`gh pr diff <NUMBER> --repo <owner>/<repo>`
|
||||
- For each open PR, collect:
|
||||
- PR number, title, author, branch, number of commits, date
|
||||
- PR description/body
|
||||
|
||||
@@ -3,6 +3,11 @@ data/
|
||||
**/data/
|
||||
**/db.json
|
||||
|
||||
# VS Code extension test runtime (large binary, not needed in npm package)
|
||||
app/vscode-extension/
|
||||
**/data/
|
||||
**/db.json
|
||||
|
||||
# Source code (pre-built app/ is published instead)
|
||||
src/
|
||||
open-sse/
|
||||
|
||||
@@ -4,6 +4,237 @@
|
||||
|
||||
---
|
||||
|
||||
## [2.7.2] — 2026-03-18
|
||||
|
||||
> Sprint: Light mode UI contrast fixes.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **fix(logs)**: Fix light mode contrast in request logs filter buttons and combo badge (#378)
|
||||
- Error/Success/Combo filter buttons now readable in light mode
|
||||
- Combo row badge uses stronger violet in light mode
|
||||
|
||||
---
|
||||
|
||||
## [2.7.1] — 2026-03-17
|
||||
|
||||
> Sprint: Unified web search routing (POST /v1/search) with 5 providers + Next.js 16.1.7 security fixes (6 CVEs).
|
||||
|
||||
### ✨ New Features
|
||||
|
||||
- **feat(search)**: Unified web search routing — `POST /v1/search` with 5 providers (Serper, Brave, Perplexity, Exa, Tavily)
|
||||
- Auto-failover across providers, 6,500+ free searches/month
|
||||
- In-memory cache with request coalescing (configurable TTL)
|
||||
- Dashboard: Search Analytics tab in `/dashboard/analytics` with provider breakdown, cache hit rate, cost tracking
|
||||
- New API: `GET /api/v1/search/analytics` for search request statistics
|
||||
- DB migration: `request_type` column on `call_logs` for non-chat request tracking
|
||||
- Zod validation (`v1SearchSchema`), auth-gated, cost recorded via `recordCost()`
|
||||
|
||||
### 🔒 Security
|
||||
|
||||
- **deps**: Next.js 16.1.6 → 16.1.7 — fixes 6 CVEs:
|
||||
- **Critical**: CVE-2026-29057 (HTTP request smuggling via http-proxy)
|
||||
- **High**: CVE-2026-27977, CVE-2026-27978 (WebSocket + Server Actions)
|
||||
- **Medium**: CVE-2026-27979, CVE-2026-27980, CVE-2026-jcc7
|
||||
|
||||
### 📁 New Files
|
||||
|
||||
| File | Purpose |
|
||||
| ---------------------------------------------------------------- | ------------------------------------------ |
|
||||
| `open-sse/handlers/search.ts` | Search handler with 5-provider routing |
|
||||
| `open-sse/config/searchRegistry.ts` | Provider registry (auth, cost, quota, TTL) |
|
||||
| `open-sse/services/searchCache.ts` | In-memory cache with request coalescing |
|
||||
| `src/app/api/v1/search/route.ts` | Next.js route (POST + GET) |
|
||||
| `src/app/api/v1/search/analytics/route.ts` | Search stats API |
|
||||
| `src/app/(dashboard)/dashboard/analytics/SearchAnalyticsTab.tsx` | Analytics dashboard tab |
|
||||
| `src/lib/db/migrations/007_search_request_type.sql` | DB migration |
|
||||
| `tests/unit/search-registry.test.mjs` | 277 lines of unit tests |
|
||||
|
||||
---
|
||||
|
||||
## [2.7.0] — 2026-03-17
|
||||
|
||||
> Sprint: ClawRouter-inspired features — toolCalling flag, multilingual intent detection, benchmark-driven fallback, request deduplication, pluggable RouterStrategy, Grok-4 Fast + GLM-5 + MiniMax M2.5 + Kimi K2.5 pricing.
|
||||
|
||||
### ✨ New Models & Pricing
|
||||
|
||||
- **feat(pricing)**: xAI Grok-4 Fast — `$0.20/$0.50 per 1M tokens`, 1143ms p50 latency, tool calling supported
|
||||
- **feat(pricing)**: xAI Grok-4 (standard) — `$0.20/$1.50 per 1M tokens`, reasoning flagship
|
||||
- **feat(pricing)**: GLM-5 via Z.AI — `$0.5/1M`, 128K output context
|
||||
- **feat(pricing)**: MiniMax M2.5 — `$0.30/1M input`, reasoning + agentic tasks
|
||||
- **feat(pricing)**: DeepSeek V3.2 — updated pricing `$0.27/$1.10 per 1M`
|
||||
- **feat(pricing)**: Kimi K2.5 via Moonshot API — direct Moonshot API access
|
||||
- **feat(providers)**: Z.AI provider added (`zai` alias) — GLM-5 family with 128K output
|
||||
|
||||
### 🧠 Routing Intelligence
|
||||
|
||||
- **feat(registry)**: `toolCalling` flag per model in provider registry — combos can now prefer/require tool-calling capable models
|
||||
- **feat(scoring)**: Multilingual intent detection for AutoCombo scoring — PT/ZH/ES/AR script/language patterns influence model selection per request context
|
||||
- **feat(fallback)**: Benchmark-driven fallback chains — real latency data (p50 from `comboMetrics`) used to re-order fallback priority dynamically
|
||||
- **feat(dedup)**: Request deduplication via content-hash — 5-second idempotency window prevents duplicate provider calls from retrying clients
|
||||
- **feat(router)**: Pluggable `RouterStrategy` interface in `autoCombo/routerStrategy.ts` — custom routing logic can be injected without modifying core
|
||||
|
||||
### 🔧 MCP Server Improvements
|
||||
|
||||
- **feat(mcp)**: 2 new advanced tool schemas: `omniroute_get_provider_metrics` (p50/p95/p99 per provider) and `omniroute_explain_route` (routing decision explanation)
|
||||
- **feat(mcp)**: MCP tool auth scopes updated — `metrics:read` scope added for provider metrics tools
|
||||
- **feat(mcp)**: `omniroute_best_combo_for_task` now accepts `languageHint` parameter for multilingual routing
|
||||
|
||||
### 📊 Observability
|
||||
|
||||
- **feat(metrics)**: `comboMetrics.ts` extended with real-time latency percentile tracking per provider/account
|
||||
- **feat(health)**: Health API (`/api/monitoring/health`) now returns per-provider `p50Latency` and `errorRate` fields
|
||||
- **feat(usage)**: Usage history migration for per-model latency tracking
|
||||
|
||||
### 🗄️ DB Migrations
|
||||
|
||||
- **feat(migrations)**: New column `latency_p50` in `combo_metrics` table — zero-breaking, safe for existing users
|
||||
|
||||
### 🐛 Bug Fixes / Closures
|
||||
|
||||
- **close(#411)**: better-sqlite3 hashed module resolution on Windows — fixed in v2.6.10 (f02c5b5)
|
||||
- **close(#409)**: GitHub Copilot chat completions fail with Claude models when files attached — fixed in v2.6.9 (838f1d6)
|
||||
- **close(#405)**: Duplicate of #411 — resolved
|
||||
|
||||
## [2.6.10] — 2026-03-17
|
||||
|
||||
> Windows fix: better-sqlite3 prebuilt download without node-gyp/Python/MSVC (#426).
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **fix(install/#426)**: On Windows, `npm install -g omniroute` used to fail with `better_sqlite3.node is not a valid Win32 application` because the bundled native binary was compiled for Linux. Adds **Strategy 1.5** to `scripts/postinstall.mjs`: uses `@mapbox/node-pre-gyp install --fallback-to-build=false` (bundled within `better-sqlite3`) to download the correct prebuilt binary for the current OS/arch without requiring any build tools (no node-gyp, no Python, no MSVC). Falls back to `npm rebuild` only if the download fails. Adds platform-specific error messages with clear manual fix instructions.
|
||||
|
||||
---
|
||||
|
||||
## [2.6.9] — 2026-03-17
|
||||
|
||||
> CI fixes (t11 any-budget), bug fix #409 (file attachments via Copilot+Claude), release workflow correction.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **fix(ci)**: Remove word "any" from comments in `openai-responses.ts` and `chatCore.ts` that were failing the t11 `\bany\b` budget check (false positive from regex counting comments)
|
||||
- **fix(chatCore)**: Normalize unsupported content part types before forwarding to providers (#409 — Cursor sends `{type:"file"}` when `.md` files are attached; Copilot and other OpenAI-compat providers reject with "type has to be either 'image_url' or 'text'"; fix converts `file`/`document` blocks to `text` and drops unknown types)
|
||||
|
||||
### 🔧 Workflow
|
||||
|
||||
- **chore(generate-release)**: Add ATOMIC COMMIT RULE — version bump (`npm version patch`) MUST happen before committing feature files to ensure tag always points to a commit containing all version changes together
|
||||
|
||||
---
|
||||
|
||||
## [2.6.8] — 2026-03-17
|
||||
|
||||
> Sprint: Combo as Agent (system prompt + tool filter), Context Caching Protection, Auto-Update, Detailed Logs, MITM Kiro IDE.
|
||||
|
||||
### 🗄️ DB Migrations (zero-breaking — safe for existing users)
|
||||
|
||||
- **005_combo_agent_fields.sql**: `ALTER TABLE combos ADD COLUMN system_message TEXT DEFAULT NULL`, `tool_filter_regex TEXT DEFAULT NULL`, `context_cache_protection INTEGER DEFAULT 0`
|
||||
- **006_detailed_request_logs.sql**: New `request_detail_logs` table with 500-entry ring-buffer trigger, opt-in via settings toggle
|
||||
|
||||
### ✨ Features
|
||||
|
||||
- **feat(combo)**: System Message Override per Combo (#399 — `system_message` field replaces or injects system prompt before forwarding to provider)
|
||||
- **feat(combo)**: Tool Filter Regex per Combo (#399 — `tool_filter_regex` keeps only tools matching pattern; supports OpenAI + Anthropic formats)
|
||||
- **feat(combo)**: Context Caching Protection (#401 — `context_cache_protection` tags responses with `<omniModel>provider/model</omniModel>` and pins model for session continuity)
|
||||
- **feat(settings)**: Auto-Update via Settings (#320 — `GET /api/system/version` + `POST /api/system/update` — checks npm registry and updates in background with pm2 restart)
|
||||
- **feat(logs)**: Detailed Request Logs (#378 — captures full pipeline bodies at 4 stages: client request, translated request, provider response, client response — opt-in toggle, 64KB trim, 500-entry ring-buffer)
|
||||
- **feat(mitm)**: MITM Kiro IDE profile (#336 — `src/mitm/targets/kiro.ts` targets api.anthropic.com, reuses existing MITM infrastructure)
|
||||
|
||||
---
|
||||
|
||||
## [2.6.7] — 2026-03-17
|
||||
|
||||
> Sprint: SSE improvements, local provider_nodes extensions, proxy registry, Claude passthrough fixes.
|
||||
|
||||
### ✨ Features
|
||||
|
||||
- **feat(health)**: Background health check for local `provider_nodes` with exponential backoff (30s→300s) and `Promise.allSettled` to avoid blocking (#423, @Regis-RCR)
|
||||
- **feat(embeddings)**: Route `/v1/embeddings` to local `provider_nodes` — `buildDynamicEmbeddingProvider()` with hostname validation (#422, @Regis-RCR)
|
||||
- **feat(audio)**: Route TTS/STT to local `provider_nodes` — `buildDynamicAudioProvider()` with SSRF protection (#416, @Regis-RCR)
|
||||
- **feat(proxy)**: Proxy registry, management APIs, and quota-limit generalization (#429, @Regis-RCR)
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **fix(sse)**: Strip Claude-specific fields (`metadata`, `anthropic_version`) when target is OpenAI-compat (#421, @prakersh)
|
||||
- **fix(sse)**: Extract Claude SSE usage (`input_tokens`, `output_tokens`, cache tokens) in passthrough stream mode (#420, @prakersh)
|
||||
- **fix(sse)**: Generate fallback `call_id` for tool calls with missing/empty IDs (#419, @prakersh)
|
||||
- **fix(sse)**: Claude-to-Claude passthrough — forward body completely untouched, no re-translation (#418, @prakersh)
|
||||
- **fix(sse)**: Filter orphaned `tool_result` items after Claude Code context compaction to avoid 400 errors (#417, @prakersh)
|
||||
- **fix(sse)**: Skip empty-name tool calls in Responses API translator to prevent `placeholder_tool` infinite loops (#415, @prakersh)
|
||||
- **fix(sse)**: Strip empty text content blocks before translation (#427, @prakersh)
|
||||
- **fix(api)**: Add `refreshable: true` to Claude OAuth test config (#428, @prakersh)
|
||||
|
||||
### 📦 Dependencies
|
||||
|
||||
- Bump `vitest`, `@vitest/*` and related devDependencies (#414, @dependabot)
|
||||
|
||||
---
|
||||
|
||||
## [2.6.6] — 2026-03-17
|
||||
|
||||
> Hotfix: Turbopack/Docker compatibility — remove `node:` protocol from all `src/` imports.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **fix(build)**: Removed `node:` protocol prefix from `import` statements in 17 files under `src/`. The `node:fs`, `node:path`, `node:url`, `node:os` etc. imports caused `Ecmascript file had an error` on Turbopack builds (Next.js 15 Docker) and on upgrades from older npm global installs. Affected files: `migrationRunner.ts`, `core.ts`, `backup.ts`, `prompts.ts`, `dataPaths.ts`, and 12 others in `src/app/api/` and `src/lib/`.
|
||||
- **chore(workflow)**: Updated `generate-release.md` to make Docker Hub sync and dual-VPS deploy **mandatory** steps in every release.
|
||||
|
||||
---
|
||||
|
||||
## [2.6.5] — 2026-03-17
|
||||
|
||||
> Sprint: reasoning model param filtering, local provider 404 fix, Kilo Gateway provider, dependency bumps.
|
||||
|
||||
### ✨ New Features
|
||||
|
||||
- **feat(api)**: Added **Kilo Gateway** (`api.kilo.ai`) as a new API Key provider (alias `kg`) — 335+ models, 6 free models, 3 auto-routing models (`kilo-auto/frontier`, `kilo-auto/balanced`, `kilo-auto/free`). Passthrough models supported via `/api/gateway/models` endpoint. (PR #408 by @Regis-RCR)
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **fix(sse)**: Strip unsupported parameters for reasoning models (o1, o1-mini, o1-pro, o3, o3-mini). Models in the `o1`/`o3` family reject `temperature`, `top_p`, `frequency_penalty`, `presence_penalty`, `logprobs`, `top_logprobs`, and `n` with HTTP 400. Parameters are now stripped at the `chatCore` layer before forwarding. Uses a declarative `unsupportedParams` field per model and a precomputed O(1) Map for lookup. (PR #412 by @Regis-RCR)
|
||||
- **fix(sse)**: Local provider 404 now results in a **model-only lockout (5 seconds)** instead of a connection-level lockout (2 minutes). When a local inference backend (Ollama, LM Studio, oMLX) returns 404 for an unknown model, the connection remains active and other models continue working immediately. Also fixes a pre-existing bug where `model` was not passed to `markAccountUnavailable()`. Local providers detected via hostname (`localhost`, `127.0.0.1`, `::1`, extensible via `LOCAL_HOSTNAMES` env var). (PR #410 by @Regis-RCR)
|
||||
|
||||
### 📦 Dependencies
|
||||
|
||||
- `better-sqlite3` 12.6.2 → 12.8.0
|
||||
- `undici` 7.24.2 → 7.24.4
|
||||
- `https-proxy-agent` 7 → 8
|
||||
- `agent-base` 7 → 8
|
||||
|
||||
---
|
||||
|
||||
## [2.6.4] — 2026-03-17
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **fix(providers)**: Removed non-existent model names across 5 providers:
|
||||
- **gemini / gemini-cli**: removed `gemini-3.1-pro/flash` and `gemini-3-*-preview` (don't exist in Google API v1beta); replaced with `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.0-flash`, `gemini-1.5-pro/flash`
|
||||
- **antigravity**: removed `gemini-3.1-pro-high/low` and `gemini-3-flash` (invalid internal aliases); replaced with real 2.x models
|
||||
- **github (Copilot)**: removed `gemini-3-flash-preview` and `gemini-3-pro-preview`; replaced with `gemini-2.5-flash`
|
||||
- **nvidia**: corrected `nvidia/llama-3.3-70b-instruct` → `meta/llama-3.3-70b-instruct` (NVIDIA NIM uses `meta/` namespace for Meta models); added `nvidia/llama-3.1-70b-instruct` and `nvidia/llama-3.1-405b-instruct`
|
||||
- **fix(db/combo)**: Updated `free-stack` combo on remote DB: removed `qw/qwen3-coder-plus` (expired refresh token), corrected `nvidia/llama-3.3-70b-instruct` → `nvidia/meta/llama-3.3-70b-instruct`, corrected `gemini/gemini-3.1-flash` → `gemini/gemini-2.5-flash`, added `if/deepseek-v3.2`
|
||||
|
||||
---
|
||||
|
||||
## [2.6.3] — 2026-03-16
|
||||
|
||||
> Sprint: zod/pino hash-strip baked into build pipeline, Synthetic provider added, VPS PM2 path corrected.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **fix(build)**: Turbopack hash-strip now runs at **compile time** for ALL packages — not just `better-sqlite3`. Step 5.6 in `prepublish.mjs` walks every `.js` in `app/.next/server/` and strips the 16-char hex suffix from any hashed `require()`. Fixes `zod-dcb22c...`, `pino-...`, etc. MODULE_NOT_FOUND on global npm installs. Closes #398
|
||||
- **fix(deploy)**: PM2 on both VPS was pointing to stale git-clone directories. Reconfigured to `app/server.js` in the npm global package. Updated `/deploy-vps` workflow to use `npm pack + scp` (npm registry rejects 299MB packages).
|
||||
|
||||
### ✨ Features
|
||||
|
||||
- **feat(provider)**: Synthetic ([synthetic.new](https://synthetic.new)) — privacy-focused OpenAI-compatible inference. `passthroughModels: true` for dynamic HuggingFace model catalog. Initial models: Kimi K2.5, MiniMax M2.5, GLM 4.7, DeepSeek V3.2. (PR #404 by @Regis-RCR)
|
||||
|
||||
### 📋 Issues Closed
|
||||
|
||||
- **close #398**: npm hash regression — fixed by compile-time hash-strip in prepublish
|
||||
- **triage #324**: Bug screenshot without steps — requested reproduction details
|
||||
|
||||
---
|
||||
|
||||
## [2.6.2] — 2026-03-16
|
||||
|
||||
> Sprint: module hashing fully fixed, 2 PRs merged (Anthropic tools filter + custom endpoint paths), Alibaba Cloud DashScope provider added, 3 stale issues closed.
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
_Your universal API proxy — one endpoint, 44+ providers, zero downtime. Now with **MCP & A2A** agent orchestration._
|
||||
|
||||
**Chat Completions • Embeddings • Image Generation • Video • Music • Audio • Reranking • MCP Server • A2A Protocol • 100% TypeScript**
|
||||
**Chat Completions • Embeddings • Image Generation • Video • Music • Audio • Reranking • **Web Search** • MCP Server • A2A Protocol • 100% TypeScript**
|
||||
|
||||
---
|
||||
|
||||
@@ -898,27 +898,44 @@ When minimized, OmniRoute lives in your system tray with quick actions:
|
||||
|
||||
## 💰 Pricing at a Glance
|
||||
|
||||
| Tier | Provider | Cost | Quota Reset | Best For |
|
||||
| ------------------- | ----------------- | ---------------------- | ---------------- | ----------------------- |
|
||||
| **💳 SUBSCRIPTION** | Claude Code (Pro) | $20/mo | 5h + weekly | Already subscribed |
|
||||
| | Codex (Plus/Pro) | $20-200/mo | 5h + weekly | OpenAI users |
|
||||
| | Gemini CLI | **FREE** | 180K/mo + 1K/day | Everyone! |
|
||||
| | GitHub Copilot | $10-19/mo | Monthly | GitHub users |
|
||||
| **🔑 API KEY** | NVIDIA NIM | **FREE** (dev forever) | ~40 RPM | 70+ open models |
|
||||
| | Cerebras | **FREE** (1M tok/day) | 60K TPM / 30 RPM | World's fastest |
|
||||
| | Groq | **FREE** (30 RPM) | 14.4K RPD | Ultra-fast Llama/Gemma |
|
||||
| | DeepSeek | Pay-per-use | None | Best price/quality |
|
||||
| | xAI (Grok) | Pay-per-use | None | Grok models |
|
||||
| | Mistral | Free trial + paid | Rate limited | European AI |
|
||||
| | OpenRouter | Pay-per-use | None | 100+ models aggr. |
|
||||
| **💰 CHEAP** | GLM-4.7 | $0.6/1M | Daily 10AM | Budget backup |
|
||||
| | MiniMax M2.1 | $0.2/1M | 5-hour rolling | Cheapest option |
|
||||
| | Kimi K2 | $9/mo flat | 10M tokens/mo | Predictable cost |
|
||||
| **🆓 FREE** | iFlow | **$0** | Unlimited | 5 models unlimited |
|
||||
| | Qwen | **$0** | Unlimited | 4 models unlimited |
|
||||
| | Kiro | **$0** | Unlimited | Claude (AWS Builder ID) |
|
||||
| Tier | Provider | Cost | Quota Reset | Best For |
|
||||
| ------------------- | --------------------------- | ------------------------- | ---------------- | --------------------------------- |
|
||||
| **💳 SUBSCRIPTION** | Claude Code (Pro) | $20/mo | 5h + weekly | Already subscribed |
|
||||
| | Codex (Plus/Pro) | $20-200/mo | 5h + weekly | OpenAI users |
|
||||
| | Gemini CLI | **FREE** | 180K/mo + 1K/day | Everyone! |
|
||||
| | GitHub Copilot | $10-19/mo | Monthly | GitHub users |
|
||||
| **🔑 API KEY** | NVIDIA NIM | **FREE** (dev forever) | ~40 RPM | 70+ open models |
|
||||
| | Cerebras | **FREE** (1M tok/day) | 60K TPM / 30 RPM | World's fastest |
|
||||
| | Groq | **FREE** (30 RPM) | 14.4K RPD | Ultra-fast Llama/Gemma |
|
||||
| | DeepSeek V3.2 | $0.27/$1.10 per 1M | None | Best price/quality reasoning |
|
||||
| | xAI Grok-4 Fast | **$0.20/$0.50 per 1M** 🆕 | None | Fastest + tool calling, ultralow |
|
||||
| | xAI Grok-4 (standard) | $0.20/$1.50 per 1M 🆕 | None | Reasoning flagship from xAI |
|
||||
| | Mistral | Free trial + paid | Rate limited | European AI |
|
||||
| | OpenRouter | Pay-per-use | None | 100+ models aggr. |
|
||||
| **💰 CHEAP** | GLM-5 (via Z.AI) 🆕 | $0.5/1M | Daily 10AM | 128K output, newest flagship |
|
||||
| | GLM-4.7 | $0.6/1M | Daily 10AM | Budget backup |
|
||||
| | MiniMax M2.5 🆕 | $0.3/1M input | 5-hour rolling | Reasoning + agentic tasks |
|
||||
| | MiniMax M2.1 | $0.2/1M | 5-hour rolling | Cheapest option |
|
||||
| | Kimi K2.5 (Moonshot API) 🆕 | Pay-per-use | None | Direct Moonshot API access |
|
||||
| | Kimi K2 | $9/mo flat | 10M tokens/mo | Predictable cost |
|
||||
| **🆓 FREE** | iFlow | **$0** | Unlimited | 5 models unlimited |
|
||||
| | Qwen | **$0** | Unlimited | 4 models unlimited |
|
||||
| | Kiro | **$0** | Unlimited | Claude Sonnet/Haiku (AWS Builder) |
|
||||
|
||||
**💡 $0 Combo Stack:** Gemini CLI (180K/mo) → iFlow (unlimited: kimi-k2-thinking, qwen3-coder-plus, deepseek-r1) → Kiro (Claude for free) → Qwen (4 models, unlimited) — **Zero cost, never stops coding.** When Gemini quota runs out, OmniRoute auto-falls back to iFlow or Kiro with zero config.
|
||||
> 🆕 **New models added (Mar 2026):** Grok-4 Fast family at $0.20/$0.50/M (benchmarked at 1143ms — 30% faster than Gemini 2.5 Flash), GLM-5 via Z.AI with 128K output, MiniMax M2.5 reasoning, DeepSeek V3.2 updated pricing, Kimi K2.5 via Moonshot direct API.
|
||||
|
||||
**💡 $0 Combo Stack — The Complete Free Setup:**
|
||||
|
||||
```
|
||||
Gemini CLI (180K/mo free)
|
||||
→ iFlow (unlimited: kimi-k2-thinking, qwen3-coder-plus, deepseek-r1)
|
||||
→ Kiro (Claude Sonnet 4.5 + Haiku — unlimited, via AWS Builder ID)
|
||||
→ Qwen (4 models — unlimited)
|
||||
→ Groq (14.4K req/day — ultra-fast)
|
||||
→ NVIDIA NIM (70+ models — 40 RPM forever)
|
||||
```
|
||||
|
||||
**Zero cost. Never stops coding.** Configure this as one OmniRoute combo and all fallbacks happen automatically — no manual switching ever.
|
||||
|
||||
---
|
||||
|
||||
@@ -1027,7 +1044,20 @@ Then in `/dashboard/media` → **Transcription** tab: upload any audio or video
|
||||
|
||||
OmniRoute v2.0 is built as an operational platform, not just a relay proxy.
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
### 🆕 New — ClawRouter-Inspired Improvements (Mar 2026)
|
||||
|
||||
| Feature | What It Does |
|
||||
| ------------------------------------ | ------------------------------------------------------------------------------------------- |
|
||||
| ⚡ **Grok-4 Fast Family** | xAI models at $0.20/$0.50/M — benchmarked 1143ms (30% faster than Gemini 2.5 Flash) |
|
||||
| 🧠 **GLM-5 via Z.AI** | 128K output context, $0.5/1M — newest flagship from the GLM family |
|
||||
| 🔮 **MiniMax M2.5** | Reasoning + agentic tasks at $0.30/1M — significant upgrade from M2.1 |
|
||||
| 🎯 **toolCalling Flag per Model** | Per-model `toolCalling: true/false` in registry — AutoCombo skips non-tool-capable models |
|
||||
| 🌍 **Multilingual Intent Detection** | PT/ZH/ES/AR keywords in AutoCombo scoring — better model selection for non-English content |
|
||||
| 📊 **Benchmark-Driven Fallbacks** | Real p95 latency from live requests feeds combo scoring — AutoCombo learns from actual data |
|
||||
| 🔁 **Request Deduplication** | Content-hash based dedup window — multi-agent safe, prevents duplicate charges |
|
||||
| 🔌 **Pluggable RouterStrategy** | Extensible `RouterStrategy` interface — add custom routing logic as plugins |
|
||||
|
||||
### 🚀 Previous v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
| ------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
@@ -1075,16 +1105,17 @@ OmniRoute v2.0 is built as an operational platform, not just a relay proxy.
|
||||
|
||||
### 🎵 Multi-Modal APIs
|
||||
|
||||
| Feature | What It Does |
|
||||
| -------------------------- | ------------------------------------------------------------- |
|
||||
| 🖼️ **Image Generation** | `/v1/images/generations` with cloud and local backends |
|
||||
| 📐 **Embeddings** | `/v1/embeddings` for search and RAG pipelines |
|
||||
| 🎤 **Audio Transcription** | `/v1/audio/transcriptions` (Whisper and additional providers) |
|
||||
| 🔊 **Text-to-Speech** | `/v1/audio/speech` (multiple engines/providers) |
|
||||
| 🎬 **Video Generation** | `/v1/videos/generations` (ComfyUI + SD WebUI workflows) |
|
||||
| 🎵 **Music Generation** | `/v1/music/generations` (ComfyUI workflows) |
|
||||
| 🛡️ **Moderations** | `/v1/moderations` safety checks |
|
||||
| 🔀 **Reranking** | `/v1/rerank` for relevance scoring |
|
||||
| Feature | What It Does |
|
||||
| -------------------------- | ------------------------------------------------------------------------------------------------------------ |
|
||||
| 🖼️ **Image Generation** | `/v1/images/generations` with cloud and local backends |
|
||||
| 📐 **Embeddings** | `/v1/embeddings` for search and RAG pipelines |
|
||||
| 🎤 **Audio Transcription** | `/v1/audio/transcriptions` (Whisper and additional providers) |
|
||||
| 🔊 **Text-to-Speech** | `/v1/audio/speech` (multiple engines/providers) |
|
||||
| 🎬 **Video Generation** | `/v1/videos/generations` (ComfyUI + SD WebUI workflows) |
|
||||
| 🎵 **Music Generation** | `/v1/music/generations` (ComfyUI workflows) |
|
||||
| 🛡️ **Moderations** | `/v1/moderations` safety checks |
|
||||
| 🔀 **Reranking** | `/v1/rerank` for relevance scoring |
|
||||
| 🔍 **Web Search** 🆕 | `/v1/search` — 5 providers (Serper, Brave, Perplexity, Exa, Tavily), 6,500+ free/month, auto-failover, cache |
|
||||
|
||||
### 🛡️ Resilience, Security & Governance
|
||||
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
# ADR-0001: Proxy Registry + Usage Control Generalization
|
||||
|
||||
Date: 2026-03-17
|
||||
Status: Accepted
|
||||
|
||||
## Context
|
||||
|
||||
OmniRoute sudah punya:
|
||||
|
||||
- Proxy assignment berbasis config-map (`global`, `providers`, `combos`, `keys`).
|
||||
- Quota-aware selection khusus provider tertentu (notably `codex`).
|
||||
|
||||
Gap utama:
|
||||
|
||||
- Proxy belum menjadi aset reusable yang bisa di-manage sebagai entitas (metadata, where-used, safe delete).
|
||||
- Usage policy belum konsisten lintas provider.
|
||||
- Error contract API belum seragam untuk endpoint manajemen.
|
||||
|
||||
## Decision
|
||||
|
||||
1. Tambah **Proxy Registry** sebagai domain baru di DB (`proxy_registry`, `proxy_assignments`).
|
||||
2. Pertahankan kompatibilitas assignment lama (fallback ke `proxyConfig` lama).
|
||||
3. Resolver runtime pakai prioritas:
|
||||
- account -> provider -> global (registry)
|
||||
- fallback ke legacy resolver jika registry belum ada assignment
|
||||
4. Wajib redaction kredensial di output list registry default.
|
||||
5. Standarkan error JSON untuk endpoint manajemen proxy agar konsisten dan punya `requestId`.
|
||||
|
||||
## Consequences
|
||||
|
||||
Positif:
|
||||
|
||||
- Proxy reusable dan bisa dilacak pemakaiannya.
|
||||
- Safe delete bisa ditegakkan (409 saat masih dipakai).
|
||||
- Migrasi bertahap tanpa breaking change runtime.
|
||||
|
||||
Negatif:
|
||||
|
||||
- Ada dual-source sementara (registry + legacy config) sampai migrasi selesai.
|
||||
- Butuh endpoint assignment tambahan dan pemetaan scope yang konsisten.
|
||||
|
||||
## Follow-up
|
||||
|
||||
- Migrasi UI provider/account dari input raw proxy ke selector registry.
|
||||
- Tambah health telemetry per proxy dan alerting.
|
||||
- Generalisasi usage control ke provider lain melalui interface policy yang sama.
|
||||
@@ -0,0 +1,32 @@
|
||||
# ADR-0002: Error Contract for Management Endpoints
|
||||
|
||||
Date: 2026-03-17
|
||||
Status: Accepted
|
||||
|
||||
## Decision
|
||||
|
||||
Management endpoints (proxy config, proxy registry, and proxy assignments) return a uniform error body:
|
||||
|
||||
```json
|
||||
{
|
||||
"error": {
|
||||
"message": "Human-readable summary",
|
||||
"type": "invalid_request | not_found | conflict | server_error",
|
||||
"details": {}
|
||||
},
|
||||
"requestId": "uuid"
|
||||
}
|
||||
```
|
||||
|
||||
## Status Mapping
|
||||
|
||||
- 400: invalid request / validation failure
|
||||
- 404: resource not found
|
||||
- 409: resource conflict (for example, proxy still assigned)
|
||||
- 500: unexpected server error
|
||||
|
||||
## Notes
|
||||
|
||||
- `requestId` is mandatory for log correlation.
|
||||
- `details` is optional and only used for safe validation details.
|
||||
- Sensitive secrets (proxy credentials, tokens) must never appear in `message` or `details`.
|
||||
@@ -0,0 +1,16 @@
|
||||
# ADR-0003: Security Checklist for Proxy Registry and Usage Controls
|
||||
|
||||
Date: 2026-03-17
|
||||
Status: Accepted
|
||||
|
||||
## Checklist
|
||||
|
||||
- Validate all management payloads with Zod.
|
||||
- Reject malformed scope assignment updates with status 400.
|
||||
- Reject deleting an in-use proxy with status 409 unless forced.
|
||||
- Never expose proxy username/password in list responses by default.
|
||||
- Never log raw credentials or token values.
|
||||
- Keep error responses free from internal stack traces.
|
||||
- Protect management endpoints with existing auth middleware policy.
|
||||
- Audit mutating operations: create/update/delete/assign/migrate.
|
||||
- Ensure resolver fallback to legacy config while migration is in transition.
|
||||
@@ -8,6 +8,16 @@ _وكيل API العالمي الخاص بك - نقطة نهاية واحدة،
|
||||
|
||||
---
|
||||
|
||||
### 🆕 الجديد في v2.7.0
|
||||
|
||||
- **RouterStrategy قابل للتوصيل** — استراتيجيات القواعد والتكلفة والكمون
|
||||
- **كشف النية متعدد اللغات** — تسجيل التوجيه بأكثر من 30 لغة
|
||||
- **إلغاء تكرار الطلبات** — تجنب مكالمات API المكررة عبر تجزئة المحتوى
|
||||
- **مزودون جدد:** Grok-4 Fast (xAI) وGLM-5 / Z.AI وMiniMax M2.5 وKimi K2.5
|
||||
- **أسعار محدثة:** Grok-4 Fast $0.20/$0.50/M، GLM-5 $0.50/M، MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://www.npmjs.com/package/omniroute)
|
||||
|
||||
@@ -8,6 +8,16 @@ _Вашият универсален API прокси — една крайна
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://www.npmjs.com/package/omniroute)
|
||||
|
||||
@@ -8,6 +8,16 @@ _Din universelle API-proxy — ét slutpunkt, 36+ udbydere, ingen nedetid. Nu me
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://www.npmjs.com/package/omniroute)
|
||||
|
||||
@@ -8,6 +8,16 @@ _Ihr universeller API-Proxy – ein Endpunkt, mehr als 36 Anbieter, keine Ausfal
|
||||
|
||||
---
|
||||
|
||||
### 🆕 Neu in v2.7.0
|
||||
|
||||
- **Erweiterbare RouterStrategy** — Regeln-, Kosten- und Latenzstrategien
|
||||
- **Mehrsprachige Absichtserkennung** — Routing-Scoring in 30+ Sprachen
|
||||
- **Anfrage-Deduplizierung** — doppelte API-Aufrufe per Content-Hash vermeiden
|
||||
- **Neue Anbieter:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Aktualisierte Preise:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://www.npmjs.com/package/omniroute)
|
||||
|
||||
@@ -11,6 +11,16 @@ _Tu proxy de API universal — un endpoint, 36+ proveedores, cero tiempo de inac
|
||||
|
||||
---
|
||||
|
||||
### 🆕 Novedades en v2.7.0
|
||||
|
||||
- **RouterStrategy enchufable** — estrategias de reglas, costo y latencia
|
||||
- **Detección de intención multilingüe** — puntuación de enrutamiento en 30+ idiomas
|
||||
- **Deduplicación de solicitudes** — evita llamadas duplicadas por hash de contenido
|
||||
- **Nuevos proveedores:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Precios actualizados:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Universaali API-välityspalvelin – yksi päätepiste, yli 36 palveluntarjoaja
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Votre proxy API universel — un endpoint, 36+ fournisseurs, zéro temps d'arr
|
||||
|
||||
---
|
||||
|
||||
### 🆕 Nouveautés dans v2.7.0
|
||||
|
||||
- **RouterStrategy extensible** — stratégies de règles, coût et latence
|
||||
- **Détection d'intention multilingue** — scoring de routage en 30+ langues
|
||||
- **Déduplication des requêtes** — évite les appels dupliqués via hash de contenu
|
||||
- **Nouveaux fournisseurs :** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Tarifs mis à jour :** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _שרת ה-API האוניברסלי שלך - נקודת קצה אחת, 36+ ספ
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Az univerzális API-proxy – egy végpont, 36+ szolgáltató, nulla állásid
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Proksi API universal Anda — satu titik akhir, 36+ penyedia, tanpa waktu henti
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -13,6 +13,16 @@ _आपका सार्वभौमिक एपीआई प्रॉक्
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Il tuo proxy API universale — un endpoint, 36+ provider, zero downtime._
|
||||
|
||||
---
|
||||
|
||||
### 🆕 Novità in v2.7.0
|
||||
|
||||
- **RouterStrategy estensibile** — strategie per regole, costo e latenza
|
||||
- **Rilevamento intento multilingue** — scoring di routing in 30+ lingue
|
||||
- **Deduplicazione richieste** — evita chiamate duplicate tramite hash del contenuto
|
||||
- **Nuovi provider:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Prezzi aggiornati:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _ユニバーサル API プロキシ — 1 つのエンドポイント、36 以
|
||||
|
||||
---
|
||||
|
||||
### 🆕 v2.7.0 の新機能
|
||||
|
||||
- **プラガブル RouterStrategy** — ルール・コスト・レイテンシ戦略をサポート
|
||||
- **多言語インテント検出** — 30以上の言語でルーティングスコアリング
|
||||
- **リクエスト重複排除** — コンテンツハッシュで重複 API 呼び出しを防止
|
||||
- **新しいプロバイダー:** Grok-4 Fast (xAI)、GLM-5 / Z.AI、MiniMax M2.5、Kimi K2.5
|
||||
- **価格更新:** Grok-4 Fast $0.20/$0.50/M、GLM-5 $0.50/M、MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _범용 API 프록시 — 하나의 엔드포인트, 36개 이상의 공급자,
|
||||
|
||||
---
|
||||
|
||||
### 🆕 v2.7.0 새로운 기능
|
||||
|
||||
- **플러그형 RouterStrategy** — 규칙, 비용, 지연 전략 지원
|
||||
- **다국어 의도 감지** — 30개 이상 언어로 라우팅 스코어링
|
||||
- **요청 중복 제거** — 콘텐츠 해시로 중복 API 호출 방지
|
||||
- **새 공급자:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **가격 업데이트:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Proksi API universal anda — satu titik akhir, 36+ pembekal, masa henti sifar.
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Uw universele API-proxy: één eindpunt, meer dan 36 providers, geen downtime._
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Din universelle API-proxy – ett endepunkt, 36+ leverandører, null nedetid._
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Iyong unibersal na API proxy — isang endpoint, 36+ provider, zero downtime._
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Twój uniwersalny serwer proxy API — jeden punkt końcowy, ponad 36 dostawcó
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Seu proxy de API universal — um endpoint, 36+ provedores, zero tempo de inati
|
||||
|
||||
---
|
||||
|
||||
### 🆕 Novidades na v2.7.0
|
||||
|
||||
- **RouterStrategy plugável** — estratégias de regras, custo e latência
|
||||
- **Detecção de intenção multilíngue** — scoring de roteamento em 30+ idiomas
|
||||
- **Deduplicação de requisições** — evita chamadas duplicadas por hash de conteúdo
|
||||
- **Novos provedores:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Preços atualizados:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Seu proxy de API universal — um endpoint, mais de 36 provedores, tempo de ina
|
||||
|
||||
---
|
||||
|
||||
### 🆕 Novidades na v2.7.0
|
||||
|
||||
- **RouterStrategy extensível** — estratégias de regras, custo e latência
|
||||
- **Deteção de intenção multilíngue** — scoring de encaminhamento em 30+ idiomas
|
||||
- **Deduplicação de pedidos** — evita chamadas duplicadas por hash de conteúdo
|
||||
- **Novos fornecedores:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Preços atualizados:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Proxy-ul dvs. universal API - un punct final, peste 36 de furnizori, zero timpi
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Ваш универсальный API-прокси — одна точка до
|
||||
|
||||
---
|
||||
|
||||
### 🆕 Новое в v2.7.0
|
||||
|
||||
- **Подключаемая RouterStrategy** — стратегии по правилам, стоимости и задержке
|
||||
- **Многоязычное распознавание намерений** — маршрутизация на 30+ языках
|
||||
- **Дедупликация запросов** — устранение дублей по хэшу содержимого
|
||||
- **Новые провайдеры:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Обновлённые цены:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Váš univerzálny proxy server API – jeden koncový bod, 36+ poskytovateľov
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Din universella API-proxy — en slutpunkt, 36+ leverantörer, noll driftstopp.
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _พร็อกซี API สากลของคุณ — จุดสิ้
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Ваш універсальний API-проксі — одна кінцева
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _Proxy API phổ quát của bạn — một điểm cuối, hơn 36 nhà cung c
|
||||
|
||||
---
|
||||
|
||||
### 🆕 What's New in v2.7.0
|
||||
|
||||
- **Pluggable RouterStrategy** — rules, cost, and latency routing strategies
|
||||
- **Multilingual intent detection** — routing scoring in 30+ languages
|
||||
- **Request deduplication** — prevent duplicate API calls via content hash
|
||||
- **New providers:** Grok-4 Fast (xAI), GLM-5 / Z.AI, MiniMax M2.5, Kimi K2.5
|
||||
- **Updated pricing:** Grok-4 Fast $0.20/$0.50/M, GLM-5 $0.50/M, MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -11,6 +11,16 @@ _您的通用 API 代理 — 一个端点,36+ 提供商,零停机时间。_
|
||||
|
||||
---
|
||||
|
||||
### 🆕 v2.7.0 新功能
|
||||
|
||||
- **可插拔 RouterStrategy** — 支持规则、成本和延迟策略
|
||||
- **多语言意图检测** — 支持 30+ 语言的路由评分
|
||||
- **请求去重** — 基于内容哈希避免重复 API 调用
|
||||
- **新增提供商:** Grok-4 Fast (xAI)、GLM-5 / Z.AI、MiniMax M2.5、Kimi K2.5
|
||||
- **价格更新:** Grok-4 Fast $0.20/$0.50/M,GLM-5 $0.50/M,MiniMax M2.5 $0.30/M
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
openapi: 3.1.0
|
||||
info:
|
||||
title: OmniRoute API
|
||||
version: 2.6.2
|
||||
version: 2.7.2
|
||||
description: |
|
||||
OmniRoute is a local-first AI API proxy router. It provides an OpenAI-compatible
|
||||
endpoint that routes requests to multiple AI providers with load balancing,
|
||||
|
||||
@@ -11,7 +11,7 @@ interface AudioModel {
|
||||
name: string;
|
||||
}
|
||||
|
||||
interface AudioProvider {
|
||||
export interface AudioProvider {
|
||||
id: string;
|
||||
baseUrl: string;
|
||||
authType: string;
|
||||
@@ -262,36 +262,74 @@ export function getSpeechProvider(providerId: string): AudioProvider | null {
|
||||
return AUDIO_SPEECH_PROVIDERS[providerId] || null;
|
||||
}
|
||||
|
||||
export interface ProviderNodeRow {
|
||||
prefix: string;
|
||||
name: string;
|
||||
baseUrl: string;
|
||||
apiType?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse audio model string (format: "provider/model" or just "model")
|
||||
* Build a dynamic AudioProvider from a provider_node DB entry.
|
||||
* Only used for local providers (localhost/127.0.0.1) — remote nodes are
|
||||
* excluded by the caller to prevent auth bypass and SSRF.
|
||||
*/
|
||||
export function buildDynamicAudioProvider(node: ProviderNodeRow, audioPath: string): AudioProvider {
|
||||
if (!node.prefix || !node.baseUrl) {
|
||||
throw new Error(`Invalid provider_node: missing prefix or baseUrl`);
|
||||
}
|
||||
const baseUrl = node.baseUrl.replace(/\/+$/, "");
|
||||
return {
|
||||
id: node.prefix,
|
||||
baseUrl: `${baseUrl}${audioPath}`,
|
||||
authType: "none",
|
||||
authHeader: "none",
|
||||
models: [],
|
||||
};
|
||||
}
|
||||
|
||||
function parseAudioModel(
|
||||
modelStr: string | null,
|
||||
registry: Record<string, AudioProvider>
|
||||
registry: Record<string, AudioProvider>,
|
||||
dynamicProviders?: AudioProvider[]
|
||||
): { provider: string | null; model: string | null } {
|
||||
if (!modelStr) return { provider: null, model: null };
|
||||
|
||||
for (const [providerId, config] of Object.entries(registry)) {
|
||||
// Phase 1: prefix match in hardcoded registry
|
||||
for (const [providerId] of Object.entries(registry)) {
|
||||
if (modelStr.startsWith(providerId + "/")) {
|
||||
return { provider: providerId, model: modelStr.slice(providerId.length + 1) };
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: bare model lookup in hardcoded registry
|
||||
for (const [providerId, config] of Object.entries(registry)) {
|
||||
if (config.models.some((m) => m.id === modelStr)) {
|
||||
return { provider: providerId, model: modelStr };
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: prefix match in dynamic providers (provider_nodes)
|
||||
if (dynamicProviders) {
|
||||
for (const dp of dynamicProviders) {
|
||||
if (modelStr.startsWith(dp.id + "/")) {
|
||||
return { provider: dp.id, model: modelStr.slice(dp.id.length + 1) };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { provider: null, model: modelStr };
|
||||
}
|
||||
|
||||
export function parseTranscriptionModel(modelStr: string | null) {
|
||||
return parseAudioModel(modelStr, AUDIO_TRANSCRIPTION_PROVIDERS);
|
||||
export function parseTranscriptionModel(
|
||||
modelStr: string | null,
|
||||
dynamicProviders?: AudioProvider[]
|
||||
) {
|
||||
return parseAudioModel(modelStr, AUDIO_TRANSCRIPTION_PROVIDERS, dynamicProviders);
|
||||
}
|
||||
|
||||
export function parseSpeechModel(modelStr: string | null) {
|
||||
return parseAudioModel(modelStr, AUDIO_SPEECH_PROVIDERS);
|
||||
export function parseSpeechModel(modelStr: string | null, dynamicProviders?: AudioProvider[]) {
|
||||
return parseAudioModel(modelStr, AUDIO_SPEECH_PROVIDERS, dynamicProviders);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -135,6 +135,7 @@ export const COOLDOWN_MS = {
|
||||
unauthorized: 2 * 60 * 1000, // 401 → 2 min
|
||||
paymentRequired: 2 * 60 * 1000, // 402/403 → 2 min
|
||||
notFound: 2 * 60 * 1000, // 404 → 2 minutes
|
||||
notFoundLocal: 5 * 1000, // 404 on local provider → 5s model-only lockout (connection stays active)
|
||||
transientInitial: 5 * 1000, // 408/500/502/503/504 first hit → 5s (backoff from here)
|
||||
transientMax: 60 * 1000, // 502/503/504 backoff ceiling → 60s
|
||||
transient: 5 * 1000, // Legacy alias → points to transientInitial
|
||||
@@ -162,6 +163,16 @@ export const PROVIDER_PROFILES = {
|
||||
circuitBreakerThreshold: 5, // More tolerant (occasional 502 is normal)
|
||||
circuitBreakerReset: 30000, // 30s reset
|
||||
},
|
||||
// Local providers (localhost inference backends like Ollama, LM Studio, oMLX).
|
||||
// Not yet wired into getProviderProfile() — will be used when local provider_nodes
|
||||
// are integrated into the resilience layer. Kept here to avoid a second constants change.
|
||||
local: {
|
||||
transientCooldown: 2000, // 2s (local — very fast recovery)
|
||||
rateLimitCooldown: 5000, // 5s (local — no real rate limits)
|
||||
maxBackoffLevel: 3, // Low ceiling (local either works or doesn't)
|
||||
circuitBreakerThreshold: 2, // Opens fast (if local is down, it's down)
|
||||
circuitBreakerReset: 15000, // 15s reset (check again quickly)
|
||||
},
|
||||
};
|
||||
|
||||
// Default rate limit values for API Key providers (auto-enabled safety net)
|
||||
|
||||
@@ -8,7 +8,43 @@
|
||||
* keyed by provider ID (e.g. "nebius", "openai").
|
||||
*/
|
||||
|
||||
export const EMBEDDING_PROVIDERS = {
|
||||
export interface EmbeddingProvider {
|
||||
id: string;
|
||||
baseUrl: string;
|
||||
authType: string;
|
||||
authHeader: string;
|
||||
models: { id: string; name: string; dimensions?: number }[];
|
||||
}
|
||||
|
||||
export interface EmbeddingProviderNodeRow {
|
||||
prefix: string;
|
||||
name: string;
|
||||
baseUrl: string;
|
||||
apiType?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a dynamic EmbeddingProvider from a local provider_node.
|
||||
* Only used for local providers (localhost) — caller must filter by hostname.
|
||||
*/
|
||||
export function buildDynamicEmbeddingProvider(node: EmbeddingProviderNodeRow): EmbeddingProvider {
|
||||
if (!node.prefix || !node.baseUrl) {
|
||||
throw new Error(`Invalid provider_node: missing prefix or baseUrl`);
|
||||
}
|
||||
if (node.prefix.includes("/") || node.prefix.includes(" ")) {
|
||||
throw new Error(`Invalid provider_node prefix "${node.prefix}": must not contain / or spaces`);
|
||||
}
|
||||
const baseUrl = node.baseUrl.replace(/\/+$/, "");
|
||||
return {
|
||||
id: node.prefix,
|
||||
baseUrl: `${baseUrl}/embeddings`,
|
||||
authType: "none",
|
||||
authHeader: "none",
|
||||
models: [],
|
||||
};
|
||||
}
|
||||
|
||||
export const EMBEDDING_PROVIDERS: Record<string, EmbeddingProvider> = {
|
||||
nebius: {
|
||||
id: "nebius",
|
||||
baseUrl: "https://api.tokenfactory.nebius.com/v1/embeddings",
|
||||
@@ -70,7 +106,7 @@ export const EMBEDDING_PROVIDERS = {
|
||||
/**
|
||||
* Get embedding provider config by ID
|
||||
*/
|
||||
export function getEmbeddingProvider(providerId) {
|
||||
export function getEmbeddingProvider(providerId: string): EmbeddingProvider | null {
|
||||
return EMBEDDING_PROVIDERS[providerId] || null;
|
||||
}
|
||||
|
||||
@@ -78,26 +114,36 @@ export function getEmbeddingProvider(providerId) {
|
||||
* Parse embedding model string (format: "provider/model" or just "model")
|
||||
* Returns { provider, model }
|
||||
*/
|
||||
export function parseEmbeddingModel(modelStr) {
|
||||
export function parseEmbeddingModel(
|
||||
modelStr: string | null,
|
||||
dynamicProviders?: EmbeddingProvider[]
|
||||
): { provider: string | null; model: string | null } {
|
||||
if (!modelStr) return { provider: null, model: null };
|
||||
|
||||
// Check for "provider/model" format
|
||||
const slashIdx = modelStr.indexOf("/");
|
||||
if (slashIdx > 0) {
|
||||
// Handle nested model IDs like "nebius/Qwen/Qwen3-Embedding-8B"
|
||||
// Try each provider prefix
|
||||
for (const [providerId, config] of Object.entries(EMBEDDING_PROVIDERS)) {
|
||||
// Phase 1: Try each hardcoded provider prefix
|
||||
for (const [providerId] of Object.entries(EMBEDDING_PROVIDERS)) {
|
||||
if (modelStr.startsWith(providerId + "/")) {
|
||||
return { provider: providerId, model: modelStr.slice(providerId.length + 1) };
|
||||
}
|
||||
}
|
||||
// Fallback: first segment is provider
|
||||
// Phase 2: Try dynamic provider_nodes prefix
|
||||
if (dynamicProviders) {
|
||||
for (const dp of dynamicProviders) {
|
||||
if (modelStr.startsWith(dp.id + "/")) {
|
||||
return { provider: dp.id, model: modelStr.slice(dp.id.length + 1) };
|
||||
}
|
||||
}
|
||||
}
|
||||
// Phase 3: Fallback — first segment is provider
|
||||
const provider = modelStr.slice(0, slashIdx);
|
||||
const model = modelStr.slice(slashIdx + 1);
|
||||
return { provider, model };
|
||||
}
|
||||
|
||||
// No provider prefix — search all providers for the model
|
||||
// No provider prefix — search hardcoded providers for the model
|
||||
for (const [providerId, config] of Object.entries(EMBEDDING_PROVIDERS)) {
|
||||
if (config.models.some((m) => m.id === modelStr)) {
|
||||
return { provider: providerId, model: modelStr };
|
||||
|
||||
@@ -11,9 +11,23 @@
|
||||
export interface RegistryModel {
|
||||
id: string;
|
||||
name: string;
|
||||
toolCalling?: boolean;
|
||||
targetFormat?: string;
|
||||
unsupportedParams?: readonly string[];
|
||||
}
|
||||
|
||||
// Reasoning models reject temperature, top_p, penalties, logprobs, n.
|
||||
// Frozen to prevent accidental mutation (shared across all model entries).
|
||||
const REASONING_UNSUPPORTED: readonly string[] = Object.freeze([
|
||||
"temperature",
|
||||
"top_p",
|
||||
"frequency_penalty",
|
||||
"presence_penalty",
|
||||
"logprobs",
|
||||
"top_logprobs",
|
||||
"n",
|
||||
]);
|
||||
|
||||
export interface RegistryOAuth {
|
||||
clientIdEnv?: string;
|
||||
clientIdDefault?: string;
|
||||
@@ -101,6 +115,7 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
},
|
||||
models: [
|
||||
{ id: "claude-opus-4-6", name: "Claude Opus 4.6" },
|
||||
{ id: "claude-sonnet-4-6", name: "Claude 4.6 Sonnet" },
|
||||
{ id: "claude-opus-4-5-20251101", name: "Claude 4.5 Opus" },
|
||||
{ id: "claude-sonnet-4-5-20250929", name: "Claude 4.5 Sonnet" },
|
||||
{ id: "claude-haiku-4-5-20251001", name: "Claude 4.5 Haiku" },
|
||||
@@ -127,12 +142,15 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
},
|
||||
models: [
|
||||
{ id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" },
|
||||
{ id: "gemini-3.1-flash", name: "Gemini 3.1 Flash" },
|
||||
{ id: "gemini-3-pro-preview", name: "Gemini 3.0 Pro Preview" },
|
||||
{ id: "gemini-3-flash-preview", name: "Gemini 3.0 Flash Preview" },
|
||||
{ id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" },
|
||||
{ id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
|
||||
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
|
||||
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
|
||||
{ id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
|
||||
{ id: "gemini-2.0-flash", name: "Gemini 2.0 Flash" },
|
||||
{ id: "gemini-2.0-flash-exp", name: "Gemini 2.0 Flash Exp" },
|
||||
{ id: "gemini-1.5-pro", name: "Gemini 1.5 Pro" },
|
||||
{ id: "gemini-1.5-flash", name: "Gemini 1.5 Flash" },
|
||||
],
|
||||
},
|
||||
|
||||
@@ -156,12 +174,14 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
},
|
||||
models: [
|
||||
{ id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" },
|
||||
{ id: "gemini-3.1-flash", name: "Gemini 3.1 Flash" },
|
||||
{ id: "gemini-3-flash-preview", name: "Gemini 3.0 Flash Preview" },
|
||||
{ id: "gemini-3-pro-preview", name: "Gemini 3.0 Pro Preview" },
|
||||
{ id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" },
|
||||
{ id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
|
||||
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
|
||||
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
|
||||
{ id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
|
||||
{ id: "gemini-2.0-flash", name: "Gemini 2.0 Flash" },
|
||||
{ id: "gemini-1.5-pro", name: "Gemini 1.5 Pro" },
|
||||
{ id: "gemini-1.5-flash", name: "Gemini 1.5 Flash" },
|
||||
],
|
||||
},
|
||||
|
||||
@@ -305,10 +325,9 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
models: [
|
||||
{ id: "claude-opus-4-6-thinking", name: "Claude Opus 4.6 Thinking" },
|
||||
{ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6" },
|
||||
{ id: "gemini-3.1-pro-high", name: "Gemini 3.1 Pro High" },
|
||||
{ id: "gemini-3.1-pro-low", name: "Gemini 3.1 Pro Low" },
|
||||
{ id: "gemini-3.1-flash", name: "Gemini 3.1 Flash" },
|
||||
{ id: "gemini-3-flash", name: "Gemini 3.0 Flash" },
|
||||
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
|
||||
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
|
||||
{ id: "gemini-2.0-flash", name: "Gemini 2.0 Flash" },
|
||||
{ id: "gpt-oss-120b-medium", name: "GPT OSS 120B Medium" },
|
||||
],
|
||||
},
|
||||
@@ -356,8 +375,7 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
{ id: "claude-sonnet-4", name: "Claude Sonnet 4" },
|
||||
{ id: "claude-sonnet-4.5", name: "Claude Sonnet 4.5" },
|
||||
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
|
||||
{ id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
|
||||
{ id: "gemini-3-pro-preview", name: "Gemini 3 Pro Preview" },
|
||||
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
|
||||
{ id: "grok-code-fast-1", name: "Grok Code Fast 1" },
|
||||
{ id: "oswe-vscode-prime", name: "Raptor Mini" },
|
||||
],
|
||||
@@ -429,8 +447,11 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
{ id: "gpt-4o", name: "GPT-4o" },
|
||||
{ id: "gpt-4o-mini", name: "GPT-4o Mini" },
|
||||
{ id: "gpt-4-turbo", name: "GPT-4 Turbo" },
|
||||
{ id: "o1", name: "O1" },
|
||||
{ id: "o1-mini", name: "O1 Mini" },
|
||||
{ id: "o1", name: "O1", unsupportedParams: REASONING_UNSUPPORTED },
|
||||
{ id: "o1-mini", name: "O1 Mini", unsupportedParams: REASONING_UNSUPPORTED },
|
||||
{ id: "o1-pro", name: "O1 Pro", unsupportedParams: REASONING_UNSUPPORTED },
|
||||
{ id: "o3", name: "O3", unsupportedParams: REASONING_UNSUPPORTED },
|
||||
{ id: "o3-mini", name: "O3 Mini", unsupportedParams: REASONING_UNSUPPORTED },
|
||||
],
|
||||
},
|
||||
|
||||
@@ -447,8 +468,13 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
"Anthropic-Version": "2023-06-01",
|
||||
},
|
||||
models: [
|
||||
{ id: "claude-haiku-4.5", name: "Claude Haiku 4.5" },
|
||||
{ id: "claude-sonnet-4-20250514", name: "Claude Sonnet 4" },
|
||||
{ id: "claude-sonnet-4-6-20251031", name: "Claude Sonnet 4.6 (Dated)" },
|
||||
{ id: "claude-sonnet-4.6", name: "Claude Sonnet 4.6" },
|
||||
{ id: "claude-opus-4-20250514", name: "Claude Opus 4" },
|
||||
{ id: "claude-opus-4-6-20251031", name: "Claude Opus 4.6 (Dated)" },
|
||||
{ id: "claude-opus-4.6", name: "Claude Opus 4.6" },
|
||||
{ id: "claude-3-5-sonnet-20241022", name: "Claude 3.5 Sonnet" },
|
||||
],
|
||||
},
|
||||
@@ -482,6 +508,8 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
"Anthropic-Beta": "claude-code-20250219,interleaved-thinking-2025-05-14",
|
||||
},
|
||||
models: [
|
||||
{ id: "glm-5", name: "GLM 5" },
|
||||
{ id: "glm-5-turbo", name: "GLM 5 Turbo" },
|
||||
{ id: "glm-4.7-flash", name: "GLM 4.7 Flash" },
|
||||
{ id: "glm-4.7", name: "GLM 4.7" },
|
||||
{ id: "glm-4.6v", name: "GLM 4.6V (Vision)" },
|
||||
@@ -493,6 +521,25 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
],
|
||||
},
|
||||
|
||||
zai: {
|
||||
id: "zai",
|
||||
alias: "zai",
|
||||
format: "claude",
|
||||
executor: "default",
|
||||
baseUrl: "https://api.z.ai/api/anthropic/v1/messages",
|
||||
urlSuffix: "?beta=true",
|
||||
authType: "apikey",
|
||||
authHeader: "x-api-key",
|
||||
headers: {
|
||||
"Anthropic-Version": "2023-06-01",
|
||||
"Anthropic-Beta": "claude-code-20250219,interleaved-thinking-2025-05-14",
|
||||
},
|
||||
models: [
|
||||
{ id: "glm-5", name: "GLM 5" },
|
||||
{ id: "glm-5-turbo", name: "GLM 5 Turbo" },
|
||||
],
|
||||
},
|
||||
|
||||
kimi: {
|
||||
id: "kimi",
|
||||
alias: "kimi",
|
||||
@@ -624,7 +671,11 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
"Anthropic-Version": "2023-06-01",
|
||||
"Anthropic-Beta": "claude-code-20250219,interleaved-thinking-2025-05-14",
|
||||
},
|
||||
models: [{ id: "MiniMax-M2.1", name: "MiniMax M2.1" }],
|
||||
models: [
|
||||
{ id: "minimax-m2.5", name: "MiniMax M2.5" },
|
||||
{ id: "MiniMax-M2.5", name: "MiniMax M2.5 (Legacy Alias)" },
|
||||
{ id: "MiniMax-M2.1", name: "MiniMax M2.1" },
|
||||
],
|
||||
},
|
||||
|
||||
"minimax-cn": {
|
||||
@@ -642,6 +693,8 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
},
|
||||
models: [
|
||||
// Keep parity with minimax to ensure model discovery works for minimax-cn connections.
|
||||
{ id: "minimax-m2.5", name: "MiniMax M2.5" },
|
||||
{ id: "MiniMax-M2.5", name: "MiniMax M2.5 (Legacy Alias)" },
|
||||
{ id: "MiniMax-M2.1", name: "MiniMax M2.1" },
|
||||
],
|
||||
},
|
||||
@@ -704,10 +757,14 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
authType: "apikey",
|
||||
authHeader: "bearer",
|
||||
models: [
|
||||
{ id: "grok-4", name: "Grok 4" },
|
||||
{ id: "grok-4-fast-non-reasoning", name: "Grok 4 Fast" },
|
||||
{ id: "grok-4-fast-reasoning", name: "Grok 4 Fast Reasoning" },
|
||||
{ id: "grok-code-fast-1", name: "Grok Code Fast" },
|
||||
{ id: "grok-4-1-fast-non-reasoning", name: "Grok 4.1 Fast" },
|
||||
{ id: "grok-4-1-fast-reasoning", name: "Grok 4.1 Fast Reasoning" },
|
||||
{ id: "grok-4-0709", name: "Grok 4 (0709)" },
|
||||
{ id: "grok-4", name: "Grok 4" },
|
||||
{ id: "grok-3", name: "Grok 3" },
|
||||
{ id: "grok-3-mini", name: "Grok 3 Mini" },
|
||||
],
|
||||
},
|
||||
|
||||
@@ -836,12 +893,17 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
authType: "apikey",
|
||||
authHeader: "bearer",
|
||||
models: [
|
||||
{ id: "gpt-oss-120b", name: "GPT OSS 120B", toolCalling: false },
|
||||
{ id: "openai/gpt-oss-120b", name: "GPT OSS 120B (OpenAI Prefix)", toolCalling: false },
|
||||
{ id: "meta/llama-3.3-70b-instruct", name: "Llama 3.3 70B" },
|
||||
{ id: "nvidia/llama-3.3-70b-instruct", name: "Llama 3.3 70B (NVIDIA Prefix)" },
|
||||
{ id: "meta/llama-4-maverick-17b-128e-instruct", name: "Llama 4 Maverick" },
|
||||
{ id: "moonshotai/kimi-k2.5", name: "Kimi K2.5" },
|
||||
{ id: "z-ai/glm4.7", name: "GLM 4.7" },
|
||||
{ id: "deepseek-ai/deepseek-v3.2", name: "DeepSeek V3.2" },
|
||||
{ id: "nvidia/llama-3.3-70b-instruct", name: "Llama 3.3 70B" },
|
||||
{ id: "meta/llama-4-maverick-17b-128e-instruct", name: "Llama 4 Maverick" },
|
||||
{ id: "deepseek/deepseek-r1", name: "DeepSeek R1" },
|
||||
{ id: "nvidia/llama-3.1-70b-instruct", name: "Llama 3.1 70B" },
|
||||
{ id: "nvidia/llama-3.1-405b-instruct", name: "Llama 3.1 405B" },
|
||||
],
|
||||
},
|
||||
|
||||
@@ -919,6 +981,46 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
],
|
||||
},
|
||||
|
||||
synthetic: {
|
||||
id: "synthetic",
|
||||
alias: "synthetic",
|
||||
format: "openai",
|
||||
executor: "default",
|
||||
baseUrl: "https://api.synthetic.new/openai/v1/chat/completions",
|
||||
modelsUrl: "https://api.synthetic.new/openai/v1/models",
|
||||
authType: "apikey",
|
||||
authHeader: "bearer",
|
||||
models: [
|
||||
{ id: "hf:nvidia/Kimi-K2.5-NVFP4", name: "Kimi K2.5 (NVFP4)" },
|
||||
{ id: "hf:MiniMaxAI/MiniMax-M2.5", name: "MiniMax M2.5" },
|
||||
{ id: "hf:zai-org/GLM-4.7-Flash", name: "GLM 4.7 Flash" },
|
||||
{ id: "hf:zai-org/GLM-4.7", name: "GLM 4.7" },
|
||||
{ id: "hf:moonshotai/Kimi-K2.5", name: "Kimi K2.5" },
|
||||
{ id: "hf:deepseek-ai/DeepSeek-V3.2", name: "DeepSeek V3.2" },
|
||||
],
|
||||
passthroughModels: true,
|
||||
},
|
||||
|
||||
"kilo-gateway": {
|
||||
id: "kilo-gateway",
|
||||
alias: "kg",
|
||||
format: "openai",
|
||||
executor: "default",
|
||||
baseUrl: "https://api.kilo.ai/api/gateway/chat/completions",
|
||||
modelsUrl: "https://api.kilo.ai/api/gateway/models",
|
||||
authType: "apikey",
|
||||
authHeader: "bearer",
|
||||
models: [
|
||||
{ id: "kilo-auto/frontier", name: "Kilo Auto Frontier" },
|
||||
{ id: "kilo-auto/balanced", name: "Kilo Auto Balanced" },
|
||||
{ id: "kilo-auto/free", name: "Kilo Auto Free" },
|
||||
{ id: "nvidia/nemotron-3-super-120b-a12b:free", name: "Nemotron 3 Super 120B (Free)" },
|
||||
{ id: "minimax/minimax-m2.5:free", name: "MiniMax M2.5 (Free)" },
|
||||
{ id: "arcee-ai/trinity-large-preview:free", name: "Trinity Large Preview (Free)" },
|
||||
],
|
||||
passthroughModels: true,
|
||||
},
|
||||
|
||||
vertex: {
|
||||
id: "vertex",
|
||||
alias: "vertex",
|
||||
@@ -1022,6 +1124,38 @@ export function generateAliasMap(): Record<string, string> {
|
||||
return map;
|
||||
}
|
||||
|
||||
// ── Local Provider Detection ──────────────────────────────────────────────
|
||||
|
||||
// Evaluated once at module load time — process restart required for env var changes.
|
||||
const LOCAL_HOSTNAMES = new Set([
|
||||
"localhost",
|
||||
"127.0.0.1",
|
||||
"::1",
|
||||
"[::1]",
|
||||
...(typeof process !== "undefined" && process.env.LOCAL_HOSTNAMES
|
||||
? process.env.LOCAL_HOSTNAMES.split(",")
|
||||
.map((h) => h.trim())
|
||||
.filter(Boolean)
|
||||
: []),
|
||||
]);
|
||||
|
||||
/**
|
||||
* Detect if a base URL points to a local inference backend.
|
||||
* Used for shorter 404 cooldowns (model-only, not connection) and health check targets.
|
||||
*
|
||||
* Operators can extend via LOCAL_HOSTNAMES env var (comma-separated) for Docker
|
||||
* hostnames (e.g., LOCAL_HOSTNAMES=omlx,mlx-audio).
|
||||
*/
|
||||
export function isLocalProvider(baseUrl?: string | null): boolean {
|
||||
if (!baseUrl) return false;
|
||||
try {
|
||||
const url = new URL(baseUrl);
|
||||
return LOCAL_HOSTNAMES.has(url.hostname);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Registry Lookup Helpers ───────────────────────────────────────────────
|
||||
|
||||
const _byAlias = new Map<string, RegistryEntry>();
|
||||
@@ -1041,6 +1175,43 @@ export function getRegisteredProviders(): string[] {
|
||||
return Object.keys(REGISTRY);
|
||||
}
|
||||
|
||||
// Precomputed map: modelId → unsupportedParams (O(1) lookup instead of O(N×M) scan).
|
||||
// Built once at module load from all registry entries.
|
||||
const _unsupportedParamsMap = new Map<string, readonly string[]>();
|
||||
for (const entry of Object.values(REGISTRY)) {
|
||||
for (const model of entry.models) {
|
||||
if (model.unsupportedParams && !_unsupportedParamsMap.has(model.id)) {
|
||||
_unsupportedParamsMap.set(model.id, model.unsupportedParams);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get unsupported parameters for a specific model.
|
||||
* Uses O(1) precomputed lookup. Also handles prefixed model IDs
|
||||
* (e.g., "openai/o3" → strips prefix and looks up "o3").
|
||||
* Returns empty array if no restrictions are defined.
|
||||
*/
|
||||
export function getUnsupportedParams(provider: string, modelId: string): readonly string[] {
|
||||
// 1. Check current provider's registry (exact match)
|
||||
const entry = getRegistryEntry(provider);
|
||||
const modelEntry = entry?.models.find((m) => m.id === modelId);
|
||||
if (modelEntry?.unsupportedParams) return modelEntry.unsupportedParams;
|
||||
|
||||
// 2. O(1) lookup in precomputed map (handles cross-provider routing)
|
||||
const cached = _unsupportedParamsMap.get(modelId);
|
||||
if (cached) return cached;
|
||||
|
||||
// 3. Handle prefixed model IDs (e.g., "openai/o3" → "o3")
|
||||
if (modelId.includes("/")) {
|
||||
const bareId = modelId.split("/").pop() || "";
|
||||
const bare = _unsupportedParamsMap.get(bareId);
|
||||
if (bare) return bare;
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get provider category: "oauth" or "apikey"
|
||||
* Used by the resilience layer to apply different cooldown/backoff profiles.
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Search Provider Registry
|
||||
*
|
||||
* Defines providers that support the /v1/search endpoint.
|
||||
* Unlike LLM/embedding providers, search providers don't have "models" —
|
||||
* a provider IS the model (Serper = Google SERP, Brave = Brave index).
|
||||
*
|
||||
* API keys are stored in the same provider credentials system,
|
||||
* keyed by provider ID (e.g. "serper-search", "brave-search").
|
||||
* perplexity-search reuses credentials from the "perplexity" chat provider.
|
||||
*/
|
||||
|
||||
export interface SearchProviderConfig {
|
||||
id: string;
|
||||
name: string;
|
||||
baseUrl: string;
|
||||
method: "GET" | "POST";
|
||||
authType: "apikey";
|
||||
authHeader: string;
|
||||
costPerQuery: number;
|
||||
freeMonthlyQuota: number;
|
||||
searchTypes: string[];
|
||||
defaultMaxResults: number;
|
||||
maxMaxResults: number;
|
||||
timeoutMs: number;
|
||||
cacheTTLMs: number;
|
||||
}
|
||||
|
||||
export const SEARCH_PROVIDERS: Record<string, SearchProviderConfig> = {
|
||||
"serper-search": {
|
||||
id: "serper-search",
|
||||
name: "Serper Search",
|
||||
baseUrl: "https://google.serper.dev",
|
||||
method: "POST",
|
||||
authType: "apikey",
|
||||
authHeader: "x-api-key",
|
||||
costPerQuery: 0.001,
|
||||
freeMonthlyQuota: 2500,
|
||||
searchTypes: ["web", "news"],
|
||||
defaultMaxResults: 5,
|
||||
maxMaxResults: 100,
|
||||
timeoutMs: 10_000,
|
||||
cacheTTLMs: 5 * 60 * 1000,
|
||||
},
|
||||
|
||||
"brave-search": {
|
||||
id: "brave-search",
|
||||
name: "Brave Search",
|
||||
baseUrl: "https://api.search.brave.com/res/v1",
|
||||
method: "GET",
|
||||
authType: "apikey",
|
||||
authHeader: "x-subscription-token",
|
||||
costPerQuery: 0.005,
|
||||
freeMonthlyQuota: 1000,
|
||||
searchTypes: ["web", "news"],
|
||||
defaultMaxResults: 5,
|
||||
maxMaxResults: 20,
|
||||
timeoutMs: 10_000,
|
||||
cacheTTLMs: 5 * 60 * 1000,
|
||||
},
|
||||
|
||||
"perplexity-search": {
|
||||
id: "perplexity-search",
|
||||
name: "Perplexity Search",
|
||||
baseUrl: "https://api.perplexity.ai/search",
|
||||
method: "POST",
|
||||
authType: "apikey",
|
||||
authHeader: "bearer",
|
||||
costPerQuery: 0.005,
|
||||
freeMonthlyQuota: 0,
|
||||
searchTypes: ["web"],
|
||||
defaultMaxResults: 5,
|
||||
maxMaxResults: 20,
|
||||
timeoutMs: 10_000,
|
||||
cacheTTLMs: 5 * 60 * 1000,
|
||||
},
|
||||
|
||||
"exa-search": {
|
||||
id: "exa-search",
|
||||
name: "Exa Search",
|
||||
baseUrl: "https://api.exa.ai/search",
|
||||
method: "POST",
|
||||
authType: "apikey",
|
||||
authHeader: "x-api-key",
|
||||
costPerQuery: 0.007,
|
||||
freeMonthlyQuota: 1000,
|
||||
searchTypes: ["web", "news"],
|
||||
defaultMaxResults: 5,
|
||||
maxMaxResults: 100,
|
||||
timeoutMs: 10_000,
|
||||
cacheTTLMs: 5 * 60 * 1000,
|
||||
},
|
||||
|
||||
"tavily-search": {
|
||||
id: "tavily-search",
|
||||
name: "Tavily Search",
|
||||
baseUrl: "https://api.tavily.com/search",
|
||||
method: "POST",
|
||||
authType: "apikey",
|
||||
authHeader: "bearer",
|
||||
costPerQuery: 0.008,
|
||||
freeMonthlyQuota: 1000,
|
||||
searchTypes: ["web", "news"],
|
||||
defaultMaxResults: 5,
|
||||
maxMaxResults: 20,
|
||||
timeoutMs: 10_000,
|
||||
cacheTTLMs: 5 * 60 * 1000,
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Credential fallback mapping — search providers that can reuse credentials
|
||||
* from a related provider (e.g., perplexity-search uses the same API key as perplexity chat).
|
||||
*/
|
||||
export const SEARCH_CREDENTIAL_FALLBACKS: Record<string, string> = {
|
||||
"perplexity-search": "perplexity",
|
||||
};
|
||||
|
||||
/**
|
||||
* Get search provider config by ID
|
||||
*/
|
||||
export function getSearchProvider(providerId: string): SearchProviderConfig | null {
|
||||
return SEARCH_PROVIDERS[providerId] || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all search providers as a flat list
|
||||
*/
|
||||
export function getAllSearchProviders(): Array<{
|
||||
id: string;
|
||||
name: string;
|
||||
searchTypes: string[];
|
||||
}> {
|
||||
return Object.values(SEARCH_PROVIDERS).map((p) => ({
|
||||
id: p.id,
|
||||
name: p.name,
|
||||
searchTypes: p.searchTypes,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the cheapest available provider.
|
||||
* If an explicit provider is given, validate and return it.
|
||||
* Otherwise, return the cheapest by costPerQuery.
|
||||
*/
|
||||
export function selectProvider(explicitProvider?: string): SearchProviderConfig | null {
|
||||
if (explicitProvider) {
|
||||
return SEARCH_PROVIDERS[explicitProvider] || null;
|
||||
}
|
||||
|
||||
const providers = Object.values(SEARCH_PROVIDERS);
|
||||
if (providers.length === 0) return null;
|
||||
|
||||
return providers.reduce((cheapest, p) => (p.costPerQuery < cheapest.costPerQuery ? p : cheapest));
|
||||
}
|
||||
@@ -381,7 +381,12 @@ async function handleTortoiseSpeech(providerConfig, body) {
|
||||
* @returns {Response}
|
||||
*/
|
||||
/** @returns {Promise<unknown>} */
|
||||
export async function handleAudioSpeech({ body, credentials }) {
|
||||
export async function handleAudioSpeech({
|
||||
body,
|
||||
credentials,
|
||||
resolvedProvider = null,
|
||||
resolvedModel = null,
|
||||
}) {
|
||||
if (!body.model) {
|
||||
return errorResponse(400, "model is required");
|
||||
}
|
||||
@@ -389,8 +394,15 @@ export async function handleAudioSpeech({ body, credentials }) {
|
||||
return errorResponse(400, "input is required");
|
||||
}
|
||||
|
||||
const { provider: providerId, model: modelId } = parseSpeechModel(body.model);
|
||||
const providerConfig = providerId ? getSpeechProvider(providerId) : null;
|
||||
// Use pre-resolved provider/model from route handler if available (supports dynamic provider_nodes).
|
||||
// Falls back to hardcoded registry lookup for backward compatibility.
|
||||
let providerConfig = resolvedProvider;
|
||||
let modelId = resolvedModel;
|
||||
if (!providerConfig) {
|
||||
const parsed = parseSpeechModel(body.model);
|
||||
providerConfig = parsed.provider ? getSpeechProvider(parsed.provider) : null;
|
||||
modelId = parsed.model;
|
||||
}
|
||||
|
||||
if (!providerConfig) {
|
||||
return errorResponse(
|
||||
@@ -403,7 +415,7 @@ export async function handleAudioSpeech({ body, credentials }) {
|
||||
const token =
|
||||
providerConfig.authType === "none" ? null : credentials?.apiKey || credentials?.accessToken;
|
||||
if (providerConfig.authType !== "none" && !token) {
|
||||
return errorResponse(401, `No credentials for speech provider: ${providerId}`);
|
||||
return errorResponse(401, `No credentials for speech provider: ${providerConfig.id}`);
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
@@ -13,7 +13,11 @@ import { getCorsOrigin } from "../utils/cors.ts";
|
||||
* - HuggingFace Inference: POST raw binary to /models/{model_id}
|
||||
*/
|
||||
|
||||
import { getTranscriptionProvider, parseTranscriptionModel } from "../config/audioRegistry.ts";
|
||||
import {
|
||||
getTranscriptionProvider,
|
||||
parseTranscriptionModel,
|
||||
type AudioProvider,
|
||||
} from "../config/audioRegistry.ts";
|
||||
import { buildAuthHeaders } from "../config/registryUtils.ts";
|
||||
import { errorResponse } from "../utils/error.ts";
|
||||
|
||||
@@ -235,9 +239,13 @@ async function handleHuggingFaceTranscription(providerConfig, file, modelId, tok
|
||||
export async function handleAudioTranscription({
|
||||
formData,
|
||||
credentials,
|
||||
resolvedProvider = null,
|
||||
resolvedModel = null,
|
||||
}: {
|
||||
formData: FormData;
|
||||
credentials?: TranscriptionCredentials | null;
|
||||
resolvedProvider?: AudioProvider | null;
|
||||
resolvedModel?: string | null;
|
||||
}): Promise<Response> {
|
||||
const model = formData.get("model");
|
||||
if (typeof model !== "string" || !model) {
|
||||
@@ -250,8 +258,14 @@ export async function handleAudioTranscription({
|
||||
}
|
||||
const file = fileEntry as Blob & { name?: unknown };
|
||||
|
||||
const { provider: providerId, model: modelId } = parseTranscriptionModel(model);
|
||||
const providerConfig = providerId ? getTranscriptionProvider(providerId) : null;
|
||||
// Use pre-resolved provider/model from route handler if available (supports dynamic provider_nodes).
|
||||
let providerConfig = resolvedProvider;
|
||||
let modelId = resolvedModel;
|
||||
if (!providerConfig) {
|
||||
const parsed = parseTranscriptionModel(model);
|
||||
providerConfig = parsed.provider ? getTranscriptionProvider(parsed.provider) : null;
|
||||
modelId = parsed.model;
|
||||
}
|
||||
|
||||
if (!providerConfig) {
|
||||
return errorResponse(
|
||||
@@ -264,7 +278,7 @@ export async function handleAudioTranscription({
|
||||
const token =
|
||||
providerConfig.authType === "none" ? null : credentials?.apiKey || credentials?.accessToken;
|
||||
if (providerConfig.authType !== "none" && !token) {
|
||||
return errorResponse(401, `No credentials for transcription provider: ${providerId}`);
|
||||
return errorResponse(401, `No credentials for transcription provider: ${providerConfig.id}`);
|
||||
}
|
||||
|
||||
// Route to provider-specific handler
|
||||
|
||||
@@ -13,6 +13,7 @@ import { refreshWithRetry } from "../services/tokenRefresh.ts";
|
||||
import { createRequestLogger } from "../utils/requestLogger.ts";
|
||||
import { getModelTargetFormat, PROVIDER_ID_TO_ALIAS } from "../config/providerModels.ts";
|
||||
import { resolveModelAlias } from "../services/modelDeprecation.ts";
|
||||
import { getUnsupportedParams } from "../config/providerRegistry.ts";
|
||||
import { createErrorResult, parseUpstreamError, formatProviderError } from "../utils/error.ts";
|
||||
import { HTTP_STATUS } from "../config/constants.ts";
|
||||
import { handleBypassRequest } from "../utils/bypassHandler.ts";
|
||||
@@ -41,6 +42,12 @@ import {
|
||||
import { getIdempotencyKey, checkIdempotency, saveIdempotency } from "@/lib/idempotencyLayer";
|
||||
import { createProgressTransform, wantsProgress } from "../utils/progressTracker.ts";
|
||||
import { isModelUnavailableError, getNextFamilyFallback } from "../services/modelFamilyFallback.ts";
|
||||
import { computeRequestHash, deduplicate, shouldDeduplicate } from "../services/requestDedup.ts";
|
||||
import {
|
||||
shouldUseFallback,
|
||||
isFallbackDecision,
|
||||
EMERGENCY_FALLBACK_CONFIG,
|
||||
} from "../services/emergencyFallback.ts";
|
||||
|
||||
export function shouldUseNativeCodexPassthrough({
|
||||
provider,
|
||||
@@ -53,7 +60,9 @@ export function shouldUseNativeCodexPassthrough({
|
||||
}): boolean {
|
||||
if (provider !== "codex") return false;
|
||||
if (sourceFormat !== FORMATS.OPENAI_RESPONSES) return false;
|
||||
return String(endpointPath || "").toLowerCase().endsWith("/responses");
|
||||
return String(endpointPath || "")
|
||||
.toLowerCase()
|
||||
.endsWith("/responses");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -86,6 +95,22 @@ export async function handleChatCore({
|
||||
}) {
|
||||
const { provider, model, extendedContext } = modelInfo;
|
||||
const startTime = Date.now();
|
||||
const persistFailureUsage = (statusCode: number, errorCode?: string | null) => {
|
||||
saveRequestUsage({
|
||||
provider: provider || "unknown",
|
||||
model: model || "unknown",
|
||||
tokens: { input: 0, output: 0, cacheRead: 0, cacheCreation: 0, reasoning: 0 },
|
||||
status: String(statusCode),
|
||||
success: false,
|
||||
latencyMs: Date.now() - startTime,
|
||||
timeToFirstTokenMs: 0,
|
||||
errorCode: errorCode || String(statusCode),
|
||||
timestamp: new Date().toISOString(),
|
||||
connectionId: connectionId || undefined,
|
||||
apiKeyId: apiKeyInfo?.id || undefined,
|
||||
apiKeyName: apiKeyInfo?.name || undefined,
|
||||
}).catch(() => {});
|
||||
};
|
||||
|
||||
// ── Phase 9.2: Idempotency check ──
|
||||
const idempotencyKey = getIdempotencyKey(clientRawRequest?.headers);
|
||||
@@ -182,10 +207,17 @@ export async function handleChatCore({
|
||||
|
||||
// Translate request (pass reqLogger for intermediate logging)
|
||||
let translatedBody = body;
|
||||
const isClaudePassthrough = sourceFormat === FORMATS.CLAUDE && targetFormat === FORMATS.CLAUDE;
|
||||
try {
|
||||
if (nativeCodexPassthrough) {
|
||||
translatedBody = { ...body, _nativeCodexPassthrough: true };
|
||||
log?.debug?.("FORMAT", "native codex passthrough enabled");
|
||||
} else if (isClaudePassthrough) {
|
||||
// Claude-to-Claude passthrough: forward body completely untouched.
|
||||
// No translation, no field stripping, no thinking normalization.
|
||||
// We are just a gateway -- do not interfere with the request in the slightest.
|
||||
translatedBody = { ...body };
|
||||
log?.debug?.("FORMAT", "claude->claude passthrough -- forwarding untouched");
|
||||
} else {
|
||||
translatedBody = { ...body };
|
||||
|
||||
@@ -230,6 +262,55 @@ export async function handleChatCore({
|
||||
});
|
||||
}
|
||||
|
||||
// Strip empty text content blocks from messages.
|
||||
// Anthropic API rejects {"type":"text","text":""} with 400 "text content blocks must be non-empty".
|
||||
// Some clients (LiteLLM passthrough, @ai-sdk/anthropic) may forward these empty blocks as-is.
|
||||
if (Array.isArray(translatedBody.messages)) {
|
||||
for (const msg of translatedBody.messages) {
|
||||
if (Array.isArray(msg.content)) {
|
||||
msg.content = msg.content.filter(
|
||||
(block: Record<string, unknown>) =>
|
||||
block.type !== "text" || (typeof block.text === "string" && block.text.length > 0)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── #409: Normalize unsupported content part types ──
|
||||
// Cursor and other clients send {type:"file"} when attaching .md or other files.
|
||||
// Providers (Copilot, OpenAI) only accept "text" and "image_url" in content arrays.
|
||||
// Convert: file → text (extract content), drop unrecognized types with a warning.
|
||||
if (Array.isArray(translatedBody.messages)) {
|
||||
for (const msg of translatedBody.messages) {
|
||||
if (msg.role === "user" && Array.isArray(msg.content)) {
|
||||
msg.content = (msg.content as Record<string, unknown>[]).flatMap(
|
||||
(block: Record<string, unknown>) => {
|
||||
if (block.type === "text" || block.type === "image_url" || block.type === "image") {
|
||||
return [block];
|
||||
}
|
||||
// file / document → extract text content
|
||||
if (block.type === "file" || block.type === "document") {
|
||||
const fileContent =
|
||||
(block.file as Record<string, unknown>)?.content ??
|
||||
(block.file as Record<string, unknown>)?.text ??
|
||||
block.content ??
|
||||
block.text;
|
||||
const fileName =
|
||||
(block.file as Record<string, unknown>)?.name ?? block.name ?? "attachment";
|
||||
if (typeof fileContent === "string" && fileContent.length > 0) {
|
||||
return [{ type: "text", text: `[${fileName}]\n${fileContent}` }];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
// Unknown types: drop silently
|
||||
log?.debug?.("CONTENT", `Dropped unsupported content part type="${block.type}"`);
|
||||
return [];
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
translatedBody = translateRequest(
|
||||
sourceFormat,
|
||||
targetFormat,
|
||||
@@ -287,9 +368,75 @@ export async function handleChatCore({
|
||||
// Update model in body
|
||||
translatedBody.model = model;
|
||||
|
||||
// Strip unsupported parameters for reasoning models (o1, o3, etc.)
|
||||
const unsupported = getUnsupportedParams(provider, model);
|
||||
if (unsupported.length > 0) {
|
||||
const stripped: string[] = [];
|
||||
for (const param of unsupported) {
|
||||
if (Object.hasOwn(translatedBody, param)) {
|
||||
stripped.push(param);
|
||||
delete translatedBody[param];
|
||||
}
|
||||
}
|
||||
if (stripped.length > 0) {
|
||||
log?.warn?.("PARAMS", `Stripped unsupported params for ${model}: ${stripped.join(", ")}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Get executor for this provider
|
||||
const executor = getExecutor(provider);
|
||||
|
||||
// Create stream controller for disconnect detection
|
||||
const streamController = createStreamController({ onDisconnect, log, provider, model });
|
||||
|
||||
const dedupRequestBody = { ...translatedBody, model: `${provider}/${model}` };
|
||||
const dedupEnabled = shouldDeduplicate(dedupRequestBody);
|
||||
const dedupHash = dedupEnabled ? computeRequestHash(dedupRequestBody) : null;
|
||||
|
||||
const executeProviderRequest = async (modelToCall = model, allowDedup = false) => {
|
||||
const execute = async () => {
|
||||
const bodyToSend =
|
||||
translatedBody.model === modelToCall
|
||||
? translatedBody
|
||||
: { ...translatedBody, model: modelToCall };
|
||||
|
||||
const rawResult = await withRateLimit(provider, connectionId, modelToCall, () =>
|
||||
executor.execute({
|
||||
model: modelToCall,
|
||||
body: bodyToSend,
|
||||
stream,
|
||||
credentials,
|
||||
signal: streamController.signal,
|
||||
log,
|
||||
extendedContext,
|
||||
})
|
||||
);
|
||||
|
||||
if (stream) return rawResult;
|
||||
|
||||
// Non-stream responses need cloning for shared dedup consumers.
|
||||
const status = rawResult.response.status;
|
||||
const statusText = rawResult.response.statusText;
|
||||
const headers = Array.from(rawResult.response.headers.entries());
|
||||
const payload = await rawResult.response.text();
|
||||
|
||||
return {
|
||||
...rawResult,
|
||||
response: new Response(payload, { status, statusText, headers }),
|
||||
};
|
||||
};
|
||||
|
||||
if (allowDedup && dedupEnabled && dedupHash) {
|
||||
const dedupResult = await deduplicate(dedupHash, execute);
|
||||
if (dedupResult.wasDeduplicated) {
|
||||
log?.debug?.("DEDUP", `Joined in-flight request hash=${dedupHash}`);
|
||||
}
|
||||
return dedupResult.result;
|
||||
}
|
||||
|
||||
return execute();
|
||||
};
|
||||
|
||||
// Track pending request
|
||||
trackPendingRequest(model, provider, connectionId, true);
|
||||
|
||||
@@ -307,9 +454,6 @@ export async function handleChatCore({
|
||||
0;
|
||||
log?.debug?.("REQUEST", `${provider.toUpperCase()} | ${model} | ${msgCount} msgs`);
|
||||
|
||||
// Create stream controller for disconnect detection
|
||||
const streamController = createStreamController({ onDisconnect, log, provider, model });
|
||||
|
||||
// Execute request using executor (handles URL building, headers, fallback, transform)
|
||||
let providerResponse;
|
||||
let providerUrl;
|
||||
@@ -317,17 +461,7 @@ export async function handleChatCore({
|
||||
let finalBody;
|
||||
|
||||
try {
|
||||
const result = await withRateLimit(provider, connectionId, model, () =>
|
||||
executor.execute({
|
||||
model,
|
||||
body: translatedBody,
|
||||
stream,
|
||||
credentials,
|
||||
signal: streamController.signal,
|
||||
log,
|
||||
extendedContext,
|
||||
})
|
||||
);
|
||||
const result = await executeProviderRequest(model, true);
|
||||
|
||||
providerResponse = result.response;
|
||||
providerUrl = result.url;
|
||||
@@ -374,6 +508,7 @@ export async function handleChatCore({
|
||||
streamController.handleError(error);
|
||||
return createErrorResult(499, "Request aborted");
|
||||
}
|
||||
persistFailureUsage(HTTP_STATUS.BAD_GATEWAY, error?.name || "upstream_error");
|
||||
const errMsg = formatProviderError(error, provider, model, HTTP_STATUS.BAD_GATEWAY);
|
||||
console.log(`${COLORS.red}[ERROR] ${errMsg}${COLORS.reset}`);
|
||||
return createErrorResult(HTTP_STATUS.BAD_GATEWAY, errMsg);
|
||||
@@ -483,17 +618,7 @@ export async function handleChatCore({
|
||||
log?.info?.("MODEL_FALLBACK", `${model} unavailable (${statusCode}) → trying ${nextModel}`);
|
||||
// Re-execute with the fallback model
|
||||
try {
|
||||
const fallbackResult = await withRateLimit(provider, connectionId, nextModel, () =>
|
||||
executor.execute({
|
||||
model: nextModel,
|
||||
body: translatedBody,
|
||||
stream,
|
||||
credentials,
|
||||
signal: streamController.signal,
|
||||
log,
|
||||
extendedContext,
|
||||
})
|
||||
);
|
||||
const fallbackResult = await executeProviderRequest(nextModel, false);
|
||||
if (fallbackResult.response.ok) {
|
||||
providerResponse = fallbackResult.response;
|
||||
providerUrl = fallbackResult.url;
|
||||
@@ -505,18 +630,79 @@ export async function handleChatCore({
|
||||
// We fall through by NOT returning here
|
||||
} else {
|
||||
// Fallback also failed — return original error
|
||||
persistFailureUsage(statusCode, "model_unavailable");
|
||||
return createErrorResult(statusCode, errMsg, retryAfterMs);
|
||||
}
|
||||
} catch {
|
||||
persistFailureUsage(statusCode, "model_unavailable");
|
||||
return createErrorResult(statusCode, errMsg, retryAfterMs);
|
||||
}
|
||||
} else {
|
||||
persistFailureUsage(statusCode, "model_unavailable");
|
||||
return createErrorResult(statusCode, errMsg, retryAfterMs);
|
||||
}
|
||||
} else {
|
||||
persistFailureUsage(statusCode, `upstream_${statusCode}`);
|
||||
return createErrorResult(statusCode, errMsg, retryAfterMs);
|
||||
}
|
||||
// ── End T5 ───────────────────────────────────────────────────────────────
|
||||
|
||||
// ── Emergency Fallback (ClawRouter Feature #09/017) ────────────────────
|
||||
// When a non-streaming request fails with a budget-related error (402 or
|
||||
// budget keywords), redirect to nvidia/gpt-oss-120b ($0.00/M) before
|
||||
// returning the error to the combo router. This gives one last free-tier
|
||||
// attempt so the user's session stays alive.
|
||||
const requestHasTools = Array.isArray(translatedBody.tools) && translatedBody.tools.length > 0;
|
||||
if (!stream) {
|
||||
const fbDecision = shouldUseFallback(
|
||||
statusCode,
|
||||
message,
|
||||
requestHasTools,
|
||||
EMERGENCY_FALLBACK_CONFIG
|
||||
);
|
||||
if (isFallbackDecision(fbDecision)) {
|
||||
log?.info?.("EMERGENCY_FALLBACK", fbDecision.reason);
|
||||
try {
|
||||
// Build a minimal fallback request using the original body but with
|
||||
// the NVIDIA free-tier model and max_tokens capped to avoid overuse.
|
||||
const fbExecutor = getExecutor(fbDecision.provider);
|
||||
const fbResult = await fbExecutor.execute({
|
||||
model: fbDecision.model,
|
||||
body: {
|
||||
...translatedBody,
|
||||
model: fbDecision.model,
|
||||
max_tokens: Math.min(
|
||||
typeof translatedBody.max_tokens === "number"
|
||||
? translatedBody.max_tokens
|
||||
: fbDecision.maxOutputTokens,
|
||||
fbDecision.maxOutputTokens
|
||||
),
|
||||
},
|
||||
stream: false,
|
||||
credentials: credentials,
|
||||
signal: streamController.signal,
|
||||
log,
|
||||
extendedContext,
|
||||
});
|
||||
if (fbResult.response.ok) {
|
||||
providerResponse = fbResult.response;
|
||||
log?.info?.(
|
||||
"EMERGENCY_FALLBACK",
|
||||
`Serving ${fbDecision.provider}/${fbDecision.model} as budget fallback for ${provider}/${model}`
|
||||
);
|
||||
// Fall through to non-streaming handler — providerResponse is now OK
|
||||
} else {
|
||||
log?.warn?.(
|
||||
"EMERGENCY_FALLBACK",
|
||||
`Emergency fallback also failed (${fbResult.response.status})`
|
||||
);
|
||||
}
|
||||
} catch (fbErr) {
|
||||
log?.warn?.("EMERGENCY_FALLBACK", `Emergency fallback error: ${fbErr?.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// ── End Emergency Fallback ────────────────────────────────────────────
|
||||
}
|
||||
|
||||
// Non-streaming response
|
||||
@@ -542,6 +728,7 @@ export async function handleChatCore({
|
||||
connectionId,
|
||||
status: `FAILED ${HTTP_STATUS.BAD_GATEWAY}`,
|
||||
}).catch(() => {});
|
||||
persistFailureUsage(HTTP_STATUS.BAD_GATEWAY, "invalid_sse_payload");
|
||||
return createErrorResult(
|
||||
HTTP_STATUS.BAD_GATEWAY,
|
||||
"Invalid SSE response for non-streaming request"
|
||||
@@ -559,6 +746,7 @@ export async function handleChatCore({
|
||||
connectionId,
|
||||
status: `FAILED ${HTTP_STATUS.BAD_GATEWAY}`,
|
||||
}).catch(() => {});
|
||||
persistFailureUsage(HTTP_STATUS.BAD_GATEWAY, "invalid_json_payload");
|
||||
return createErrorResult(HTTP_STATUS.BAD_GATEWAY, "Invalid JSON response from provider");
|
||||
}
|
||||
}
|
||||
@@ -601,6 +789,11 @@ export async function handleChatCore({
|
||||
provider: provider || "unknown",
|
||||
model: model || "unknown",
|
||||
tokens: usage,
|
||||
status: "200",
|
||||
success: true,
|
||||
latencyMs: Date.now() - startTime,
|
||||
timeToFirstTokenMs: Date.now() - startTime,
|
||||
errorCode: null,
|
||||
timestamp: new Date().toISOString(),
|
||||
connectionId: connectionId || undefined,
|
||||
apiKeyId: apiKeyInfo?.id || undefined,
|
||||
|
||||
@@ -13,18 +13,48 @@
|
||||
* }
|
||||
*/
|
||||
|
||||
import { getEmbeddingProvider, parseEmbeddingModel } from "../config/embeddingRegistry.ts";
|
||||
import {
|
||||
getEmbeddingProvider,
|
||||
parseEmbeddingModel,
|
||||
type EmbeddingProvider,
|
||||
} from "../config/embeddingRegistry.ts";
|
||||
import { saveCallLog } from "@/lib/usageDb";
|
||||
|
||||
/**
|
||||
* Handle embedding request
|
||||
* @param {object} options
|
||||
* @param {object} options.body - Request body
|
||||
* @param {object} options.credentials - Provider credentials { apiKey, accessToken }
|
||||
* @param {object} options.log - Logger
|
||||
* Handle embedding request.
|
||||
* Supports both hardcoded cloud providers and dynamic local provider_nodes.
|
||||
* When resolvedProvider is passed, uses it directly (injection pattern from route handler).
|
||||
* Falls back to hardcoded registry lookup for backward compatibility.
|
||||
*/
|
||||
export async function handleEmbedding({ body, credentials, log }) {
|
||||
const { provider, model } = parseEmbeddingModel(body.model);
|
||||
export async function handleEmbedding({
|
||||
body,
|
||||
credentials,
|
||||
log,
|
||||
resolvedProvider = null,
|
||||
resolvedModel = null,
|
||||
}: {
|
||||
body: Record<string, unknown>;
|
||||
credentials: { apiKey?: string; accessToken?: string } | null;
|
||||
log?: { info: (...args: unknown[]) => void; error: (...args: unknown[]) => void };
|
||||
resolvedProvider?: EmbeddingProvider | null;
|
||||
resolvedModel?: string | null;
|
||||
}) {
|
||||
// Use pre-resolved provider/model from route handler if available (supports dynamic provider_nodes).
|
||||
let provider: string | null;
|
||||
let model: string | null;
|
||||
let providerConfig: EmbeddingProvider | null;
|
||||
|
||||
if (resolvedProvider) {
|
||||
provider = resolvedProvider.id;
|
||||
model = resolvedModel;
|
||||
providerConfig = resolvedProvider;
|
||||
} else {
|
||||
const parsed = parseEmbeddingModel(body.model as string);
|
||||
provider = parsed.provider;
|
||||
model = parsed.model;
|
||||
providerConfig = provider ? getEmbeddingProvider(provider) : null;
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
// Summarized request body for call log (avoid storing large embedding input arrays)
|
||||
@@ -42,7 +72,6 @@ export async function handleEmbedding({ body, credentials, log }) {
|
||||
};
|
||||
}
|
||||
|
||||
const providerConfig = getEmbeddingProvider(provider);
|
||||
if (!providerConfig) {
|
||||
return {
|
||||
success: false,
|
||||
@@ -66,11 +95,15 @@ export async function handleEmbedding({ body, credentials, log }) {
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
|
||||
const token = credentials.apiKey || credentials.accessToken;
|
||||
if (providerConfig.authHeader === "bearer") {
|
||||
headers["Authorization"] = `Bearer ${token}`;
|
||||
} else if (providerConfig.authHeader === "x-api-key") {
|
||||
headers["x-api-key"] = token;
|
||||
// Skip credential injection for local providers (authType: "none")
|
||||
const token =
|
||||
providerConfig.authType === "none" ? null : credentials?.apiKey || credentials?.accessToken;
|
||||
if (token) {
|
||||
if (providerConfig.authHeader === "bearer") {
|
||||
headers["Authorization"] = `Bearer ${token}`;
|
||||
} else if (providerConfig.authHeader === "x-api-key") {
|
||||
headers["x-api-key"] = token;
|
||||
}
|
||||
}
|
||||
|
||||
if (log) {
|
||||
|
||||
@@ -0,0 +1,664 @@
|
||||
/**
|
||||
* Search Handler
|
||||
*
|
||||
* Handles POST /v1/search requests.
|
||||
* Routes to 5 search providers with automatic failover:
|
||||
* serper-search, brave-search, perplexity-search, exa-search, tavily-search
|
||||
*
|
||||
* Request format:
|
||||
* {
|
||||
* "query": "search query",
|
||||
* "provider": "serper-search" | "brave-search" | ... // optional, auto-selects cheapest
|
||||
* "max_results": 5,
|
||||
* "search_type": "web" | "news"
|
||||
* }
|
||||
*/
|
||||
|
||||
import { getSearchProvider, type SearchProviderConfig } from "../config/searchRegistry.ts";
|
||||
import { saveCallLog } from "@/lib/usageDb";
|
||||
|
||||
// ── Types ────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface SearchResult {
|
||||
title: string;
|
||||
url: string;
|
||||
display_url?: string;
|
||||
snippet: string;
|
||||
position: number;
|
||||
score: number | null;
|
||||
published_at: string | null;
|
||||
favicon_url: string | null;
|
||||
content: { format: string; text: string; length: number } | null;
|
||||
metadata: {
|
||||
author: string | null;
|
||||
language: string | null;
|
||||
source_type: string | null;
|
||||
image_url: string | null;
|
||||
} | null;
|
||||
citation: {
|
||||
provider: string;
|
||||
retrieved_at: string;
|
||||
rank: number;
|
||||
};
|
||||
provider_raw: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface SearchResponse {
|
||||
provider: string;
|
||||
query: string;
|
||||
results: SearchResult[];
|
||||
answer: { source: string; text: string | null; model: string | null } | null;
|
||||
usage: { queries_used: number; search_cost_usd: number; llm_tokens?: number };
|
||||
metrics: {
|
||||
response_time_ms: number;
|
||||
upstream_latency_ms: number;
|
||||
gateway_latency_ms?: number;
|
||||
total_results_available: number | null;
|
||||
};
|
||||
errors: Array<{ provider: string; code: string; message: string }>;
|
||||
}
|
||||
|
||||
interface SearchHandlerResult {
|
||||
success: boolean;
|
||||
status?: number;
|
||||
error?: string;
|
||||
data?: SearchResponse;
|
||||
}
|
||||
|
||||
interface SearchHandlerOptions {
|
||||
query: string;
|
||||
provider: string;
|
||||
maxResults: number;
|
||||
searchType: string;
|
||||
country?: string;
|
||||
language?: string;
|
||||
timeRange?: string;
|
||||
offset?: number;
|
||||
domainFilter?: string[];
|
||||
contentOptions?: { snippet?: boolean; full_page?: boolean; format?: string; max_characters?: number };
|
||||
strictFilters?: boolean;
|
||||
providerOptions?: Record<string, unknown>;
|
||||
credentials: Record<string, any>;
|
||||
alternateProvider?: string;
|
||||
alternateCredentials?: Record<string, any> | null;
|
||||
log?: any;
|
||||
}
|
||||
|
||||
// ── Constants ────────────────────────────────────────────────────────────
|
||||
|
||||
const GLOBAL_TIMEOUT_MS = 15_000;
|
||||
|
||||
// Non-retriable HTTP status codes — fail immediately, don't try alternate
|
||||
const NON_RETRIABLE = new Set([400, 401, 403, 404]);
|
||||
|
||||
// ── Input Sanitization ──────────────────────────────────────────────────
|
||||
|
||||
// Control characters that should never appear in search queries
|
||||
const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/;
|
||||
|
||||
function sanitizeQuery(query: string): { clean: string; error?: string } {
|
||||
if (CONTROL_CHAR_RE.test(query)) {
|
||||
return { clean: "", error: "Query contains invalid control characters" };
|
||||
}
|
||||
const clean = query.normalize("NFKC").trim().replace(/\s+/g, " ");
|
||||
if (clean.length === 0) {
|
||||
return { clean: "", error: "Query is empty after normalization" };
|
||||
}
|
||||
return { clean };
|
||||
}
|
||||
|
||||
// ── Response Normalizers ────────────────────────────────────────────────
|
||||
|
||||
function makeResult(
|
||||
providerId: string,
|
||||
item: {
|
||||
title?: string;
|
||||
url?: string;
|
||||
snippet?: string;
|
||||
score?: number;
|
||||
published_at?: string;
|
||||
favicon_url?: string;
|
||||
author?: string;
|
||||
source_type?: string;
|
||||
image_url?: string;
|
||||
full_text?: string;
|
||||
text_format?: string;
|
||||
},
|
||||
idx: number,
|
||||
now: string
|
||||
): SearchResult {
|
||||
const url = item.url || "";
|
||||
return {
|
||||
title: item.title || "",
|
||||
url,
|
||||
display_url: url ? url.replace(/^https?:\/\/(www\.)?/, "").split("?")[0] : undefined,
|
||||
snippet: item.snippet || "",
|
||||
position: idx + 1,
|
||||
score: typeof item.score === "number" ? Math.min(1, Math.max(0, item.score)) : null,
|
||||
published_at: item.published_at || null,
|
||||
favicon_url: item.favicon_url || null,
|
||||
content: item.full_text
|
||||
? { format: item.text_format || "text", text: item.full_text, length: item.full_text.length }
|
||||
: null,
|
||||
metadata: {
|
||||
author: item.author || null,
|
||||
language: null,
|
||||
source_type: item.source_type || null,
|
||||
image_url: item.image_url || null,
|
||||
},
|
||||
citation: { provider: providerId, retrieved_at: now, rank: idx + 1 },
|
||||
provider_raw: null,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeSerperResponse(
|
||||
data: any,
|
||||
_query: string,
|
||||
searchType: string
|
||||
): { results: SearchResult[]; totalResults: number | null } {
|
||||
const now = new Date().toISOString();
|
||||
const items = searchType === "news" ? data.news : data.organic;
|
||||
if (!Array.isArray(items)) return { results: [], totalResults: null };
|
||||
|
||||
const results = items.map((item: any, idx: number) =>
|
||||
makeResult(
|
||||
"serper-search",
|
||||
{
|
||||
title: item.title,
|
||||
url: item.link,
|
||||
snippet: item.snippet || item.description,
|
||||
published_at: item.date,
|
||||
},
|
||||
idx,
|
||||
now
|
||||
)
|
||||
);
|
||||
|
||||
return {
|
||||
results,
|
||||
totalResults:
|
||||
typeof data.searchParameters?.totalResults === "number"
|
||||
? data.searchParameters.totalResults
|
||||
: null,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeBraveResponse(
|
||||
data: any,
|
||||
_query: string,
|
||||
searchType: string
|
||||
): { results: SearchResult[]; totalResults: number | null } {
|
||||
const now = new Date().toISOString();
|
||||
const container = searchType === "news" ? data.news : data.web;
|
||||
const items = container?.results;
|
||||
if (!Array.isArray(items)) return { results: [], totalResults: null };
|
||||
|
||||
const results = items.map((item: any, idx: number) =>
|
||||
makeResult(
|
||||
"brave-search",
|
||||
{
|
||||
title: item.title,
|
||||
url: item.url,
|
||||
snippet: item.description,
|
||||
published_at: item.page_age || item.age,
|
||||
favicon_url: item.meta_url?.favicon || item.favicon,
|
||||
},
|
||||
idx,
|
||||
now
|
||||
)
|
||||
);
|
||||
|
||||
return { results, totalResults: container?.totalCount ?? null };
|
||||
}
|
||||
|
||||
// ── Helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
function parseDomainFilter(domainFilter?: string[]): {
|
||||
includes: string[];
|
||||
excludes: string[];
|
||||
} {
|
||||
if (!domainFilter?.length) return { includes: [], excludes: [] };
|
||||
const includes = domainFilter.filter((d) => !d.startsWith("-"));
|
||||
const excludes = domainFilter.filter((d) => d.startsWith("-")).map((d) => d.slice(1));
|
||||
return { includes, excludes };
|
||||
}
|
||||
|
||||
// ── Provider Request Builders ───────────────────────────────────────────
|
||||
|
||||
interface SearchRequestParams {
|
||||
query: string;
|
||||
searchType: string;
|
||||
maxResults: number;
|
||||
token: string;
|
||||
country?: string;
|
||||
language?: string;
|
||||
domainFilter?: string[];
|
||||
}
|
||||
|
||||
function buildSerperRequest(
|
||||
config: SearchProviderConfig,
|
||||
params: SearchRequestParams
|
||||
): { url: string; init: RequestInit } {
|
||||
const endpoint = params.searchType === "news" ? "/news" : "/search";
|
||||
const body: Record<string, unknown> = { q: params.query, num: params.maxResults };
|
||||
if (params.country) body.gl = params.country.toLowerCase();
|
||||
if (params.language) body.hl = params.language;
|
||||
return {
|
||||
url: `${config.baseUrl}${endpoint}`,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "X-API-Key": params.token },
|
||||
body: JSON.stringify(body),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildBraveRequest(
|
||||
config: SearchProviderConfig,
|
||||
params: SearchRequestParams
|
||||
): { url: string; init: RequestInit } {
|
||||
const endpoint = params.searchType === "news" ? "/news/search" : "/web/search";
|
||||
const qp = new URLSearchParams({ q: params.query, count: String(params.maxResults) });
|
||||
if (params.country) qp.set("country", params.country);
|
||||
if (params.language) qp.set("search_lang", params.language);
|
||||
return {
|
||||
url: `${config.baseUrl}${endpoint}?${qp}`,
|
||||
init: {
|
||||
method: "GET",
|
||||
headers: { Accept: "application/json", "X-Subscription-Token": params.token },
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildPerplexityRequest(
|
||||
config: SearchProviderConfig,
|
||||
params: SearchRequestParams
|
||||
): { url: string; init: RequestInit } {
|
||||
const body: Record<string, unknown> = { query: params.query, max_results: params.maxResults };
|
||||
if (params.country) body.country = params.country;
|
||||
if (params.language) body.search_language_filter = [params.language];
|
||||
if (params.domainFilter?.length) body.search_domain_filter = params.domainFilter;
|
||||
return {
|
||||
url: config.baseUrl,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${params.token}` },
|
||||
body: JSON.stringify(body),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildExaRequest(
|
||||
config: SearchProviderConfig,
|
||||
params: SearchRequestParams
|
||||
): { url: string; init: RequestInit } {
|
||||
const { includes, excludes } = parseDomainFilter(params.domainFilter);
|
||||
const body: Record<string, unknown> = {
|
||||
query: params.query,
|
||||
numResults: params.maxResults,
|
||||
type: "auto",
|
||||
text: true,
|
||||
highlights: true,
|
||||
};
|
||||
if (includes.length) body.includeDomains = includes;
|
||||
if (excludes.length) body.excludeDomains = excludes;
|
||||
if (params.searchType === "news") body.category = "news";
|
||||
return {
|
||||
url: config.baseUrl,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "x-api-key": params.token },
|
||||
body: JSON.stringify(body),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildTavilyRequest(
|
||||
config: SearchProviderConfig,
|
||||
params: SearchRequestParams
|
||||
): { url: string; init: RequestInit } {
|
||||
const { includes, excludes } = parseDomainFilter(params.domainFilter);
|
||||
const body: Record<string, unknown> = {
|
||||
query: params.query,
|
||||
max_results: params.maxResults,
|
||||
topic: params.searchType === "news" ? "news" : "general",
|
||||
};
|
||||
if (includes.length) body.include_domains = includes;
|
||||
if (excludes.length) body.exclude_domains = excludes;
|
||||
if (params.country) body.country = params.country;
|
||||
return {
|
||||
url: config.baseUrl,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${params.token}` },
|
||||
body: JSON.stringify(body),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildRequest(
|
||||
config: SearchProviderConfig,
|
||||
params: SearchRequestParams
|
||||
): { url: string; init: RequestInit } {
|
||||
if (config.id === "serper-search") return buildSerperRequest(config, params);
|
||||
if (config.id === "brave-search") return buildBraveRequest(config, params);
|
||||
if (config.id === "perplexity-search") return buildPerplexityRequest(config, params);
|
||||
if (config.id === "exa-search") return buildExaRequest(config, params);
|
||||
if (config.id === "tavily-search") return buildTavilyRequest(config, params);
|
||||
// Fallback for future providers: POST with bearer auth
|
||||
return {
|
||||
url: config.baseUrl,
|
||||
init: {
|
||||
method: config.method,
|
||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${params.token}` },
|
||||
body: JSON.stringify({
|
||||
query: params.query,
|
||||
max_results: params.maxResults,
|
||||
search_type: params.searchType,
|
||||
}),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function normalizePerplexityResponse(
|
||||
data: any,
|
||||
_query: string,
|
||||
_searchType: string
|
||||
): { results: SearchResult[]; totalResults: number | null } {
|
||||
const now = new Date().toISOString();
|
||||
const items = data.results;
|
||||
if (!Array.isArray(items)) return { results: [], totalResults: null };
|
||||
|
||||
const results = items.map((item: any, idx: number) =>
|
||||
makeResult(
|
||||
"perplexity-search",
|
||||
{
|
||||
title: item.title,
|
||||
url: item.url,
|
||||
snippet: item.snippet,
|
||||
published_at: item.date || item.last_updated,
|
||||
},
|
||||
idx,
|
||||
now
|
||||
)
|
||||
);
|
||||
return { results, totalResults: results.length };
|
||||
}
|
||||
|
||||
function normalizeExaResponse(
|
||||
data: any,
|
||||
_query: string,
|
||||
_searchType: string
|
||||
): { results: SearchResult[]; totalResults: number | null } {
|
||||
const now = new Date().toISOString();
|
||||
const items = data.results;
|
||||
if (!Array.isArray(items)) return { results: [], totalResults: null };
|
||||
|
||||
const results = items.map((item: any, idx: number) =>
|
||||
makeResult(
|
||||
"exa-search",
|
||||
{
|
||||
title: item.title,
|
||||
url: item.url,
|
||||
snippet: item.highlights?.[0] || item.text?.slice(0, 300) || "",
|
||||
score: item.score,
|
||||
published_at: item.publishedDate,
|
||||
favicon_url: item.favicon,
|
||||
author: item.author,
|
||||
image_url: item.image,
|
||||
full_text: item.text,
|
||||
text_format: "text",
|
||||
},
|
||||
idx,
|
||||
now
|
||||
)
|
||||
);
|
||||
return { results, totalResults: results.length };
|
||||
}
|
||||
|
||||
function normalizeTavilyResponse(
|
||||
data: any,
|
||||
_query: string,
|
||||
_searchType: string
|
||||
): { results: SearchResult[]; totalResults: number | null } {
|
||||
const now = new Date().toISOString();
|
||||
const items = data.results;
|
||||
if (!Array.isArray(items)) return { results: [], totalResults: null };
|
||||
|
||||
const results = items.map((item: any, idx: number) =>
|
||||
makeResult(
|
||||
"tavily-search",
|
||||
{
|
||||
title: item.title,
|
||||
url: item.url,
|
||||
snippet: item.content || "",
|
||||
score: item.score,
|
||||
published_at: item.published_date,
|
||||
full_text: item.raw_content,
|
||||
text_format: "text",
|
||||
},
|
||||
idx,
|
||||
now
|
||||
)
|
||||
);
|
||||
return { results, totalResults: results.length };
|
||||
}
|
||||
|
||||
function normalizeResponse(
|
||||
providerId: string,
|
||||
data: any,
|
||||
query: string,
|
||||
searchType: string
|
||||
): { results: SearchResult[]; totalResults: number | null } {
|
||||
if (providerId === "serper-search") return normalizeSerperResponse(data, query, searchType);
|
||||
if (providerId === "brave-search") return normalizeBraveResponse(data, query, searchType);
|
||||
if (providerId === "perplexity-search")
|
||||
return normalizePerplexityResponse(data, query, searchType);
|
||||
if (providerId === "exa-search") return normalizeExaResponse(data, query, searchType);
|
||||
if (providerId === "tavily-search") return normalizeTavilyResponse(data, query, searchType);
|
||||
return { results: [], totalResults: null };
|
||||
}
|
||||
|
||||
// ── Main Handler ────────────────────────────────────────────────────────
|
||||
|
||||
export async function handleSearch(options: SearchHandlerOptions): Promise<SearchHandlerResult> {
|
||||
const {
|
||||
query,
|
||||
provider: providerId,
|
||||
maxResults,
|
||||
searchType,
|
||||
country,
|
||||
language,
|
||||
domainFilter,
|
||||
credentials,
|
||||
alternateProvider,
|
||||
alternateCredentials,
|
||||
log,
|
||||
} = options;
|
||||
const startTime = Date.now();
|
||||
|
||||
// 1. Sanitize input
|
||||
const { clean: cleanQuery, error: sanitizeError } = sanitizeQuery(query);
|
||||
if (sanitizeError) {
|
||||
return { success: false, status: 400, error: sanitizeError };
|
||||
}
|
||||
|
||||
// 2. Use resolved provider from route (no re-resolution)
|
||||
const primaryConfig = getSearchProvider(providerId);
|
||||
if (!primaryConfig) {
|
||||
return {
|
||||
success: false,
|
||||
status: 400,
|
||||
error: `Unknown search provider: ${providerId}`,
|
||||
};
|
||||
}
|
||||
|
||||
// 3. Get alternate config for failover (pre-resolved by route)
|
||||
const alternateConfig = alternateProvider ? getSearchProvider(alternateProvider) : null;
|
||||
|
||||
const requestParams = {
|
||||
query: cleanQuery,
|
||||
searchType,
|
||||
maxResults,
|
||||
country,
|
||||
language,
|
||||
domainFilter,
|
||||
};
|
||||
|
||||
// 4. Try primary provider
|
||||
const result = await tryProvider(primaryConfig, requestParams, credentials, startTime, log);
|
||||
|
||||
if (result.success) return result;
|
||||
|
||||
// 5. Failover to alternate (only for retriable errors and auto-select mode)
|
||||
if (
|
||||
alternateConfig &&
|
||||
alternateCredentials &&
|
||||
!NON_RETRIABLE.has(result.status || 0) &&
|
||||
Date.now() - startTime < GLOBAL_TIMEOUT_MS
|
||||
) {
|
||||
if (log) {
|
||||
log.warn(
|
||||
"SEARCH",
|
||||
`${primaryConfig.id} failed (${result.status}), trying ${alternateConfig.id}`
|
||||
);
|
||||
}
|
||||
|
||||
const fallbackResult = await tryProvider(
|
||||
alternateConfig,
|
||||
requestParams,
|
||||
alternateCredentials,
|
||||
startTime,
|
||||
log
|
||||
);
|
||||
|
||||
if (fallbackResult.success) return fallbackResult;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async function tryProvider(
|
||||
config: SearchProviderConfig,
|
||||
params: Omit<SearchRequestParams, "token">,
|
||||
credentials: Record<string, any>,
|
||||
globalStartTime: number,
|
||||
log?: any
|
||||
): Promise<SearchHandlerResult> {
|
||||
const startTime = Date.now();
|
||||
const token = credentials.apiKey || credentials.accessToken;
|
||||
|
||||
if (!token) {
|
||||
return {
|
||||
success: false,
|
||||
status: 401,
|
||||
error: `No credentials for search provider: ${config.id}`,
|
||||
};
|
||||
}
|
||||
|
||||
const { query, searchType, maxResults } = params;
|
||||
const { url, init } = buildRequest(config, { ...params, token });
|
||||
|
||||
// Timeout: min of provider timeout and remaining global timeout
|
||||
const remainingGlobal = GLOBAL_TIMEOUT_MS - (Date.now() - globalStartTime);
|
||||
const timeout = Math.min(config.timeoutMs, Math.max(remainingGlobal, 1000));
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
if (log) {
|
||||
log.info("SEARCH", `${config.id} | query: "${query.slice(0, 80)}" | type: ${searchType}`);
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(url, { ...init, signal: controller.signal });
|
||||
clearTimeout(timer);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
if (log) {
|
||||
log.error("SEARCH", `${config.id} error ${response.status}: ${errorText.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
saveCallLog({
|
||||
method: config.method,
|
||||
path: "/v1/search",
|
||||
status: response.status,
|
||||
model: config.id,
|
||||
provider: config.id,
|
||||
duration: Date.now() - startTime,
|
||||
requestType: "search",
|
||||
error: errorText.slice(0, 500),
|
||||
requestBody: {
|
||||
query: query.slice(0, 200),
|
||||
search_type: searchType,
|
||||
max_results: maxResults,
|
||||
},
|
||||
}).catch(() => { /* non-critical — logging must not block search response */ });
|
||||
|
||||
return {
|
||||
success: false,
|
||||
status: response.status,
|
||||
error: `Search provider ${config.id} returned ${response.status}`,
|
||||
};
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const { results, totalResults } = normalizeResponse(config.id, data, query, searchType);
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
saveCallLog({
|
||||
method: config.method,
|
||||
path: "/v1/search",
|
||||
status: 200,
|
||||
model: config.id,
|
||||
provider: config.id,
|
||||
duration,
|
||||
requestType: "search",
|
||||
tokens: { prompt_tokens: 0, completion_tokens: 0 },
|
||||
requestBody: { query: query.slice(0, 200), search_type: searchType, max_results: maxResults },
|
||||
responseBody: { results_count: results.length, cached: false },
|
||||
}).catch(() => { /* non-critical — logging must not block search response */ });
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: {
|
||||
provider: config.id,
|
||||
query,
|
||||
results,
|
||||
answer: null,
|
||||
usage: { queries_used: 1, search_cost_usd: config.costPerQuery },
|
||||
metrics: {
|
||||
response_time_ms: duration,
|
||||
upstream_latency_ms: duration,
|
||||
total_results_available: totalResults,
|
||||
},
|
||||
errors: [],
|
||||
},
|
||||
};
|
||||
} catch (err: any) {
|
||||
clearTimeout(timer);
|
||||
|
||||
const isTimeout = err.name === "AbortError";
|
||||
if (log) {
|
||||
log.error("SEARCH", `${config.id} ${isTimeout ? "timeout" : "fetch error"}: ${err.message}`);
|
||||
}
|
||||
|
||||
saveCallLog({
|
||||
method: config.method,
|
||||
path: "/v1/search",
|
||||
status: isTimeout ? 504 : 502,
|
||||
model: config.id,
|
||||
provider: config.id,
|
||||
duration: Date.now() - startTime,
|
||||
requestType: "search",
|
||||
error: err.message,
|
||||
requestBody: { query: query.slice(0, 200), search_type: searchType, max_results: maxResults },
|
||||
}).catch(() => { /* non-critical — logging must not block search response */ });
|
||||
|
||||
return {
|
||||
success: false,
|
||||
status: isTimeout ? 504 : 502,
|
||||
error: `Search provider ${isTimeout ? "timeout" : "error"}: ${err.message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
MCP_TOOLS,
|
||||
MCP_TOOL_MAP,
|
||||
setRoutingStrategyInput,
|
||||
setRoutingStrategyTool,
|
||||
} from "../schemas/tools.ts";
|
||||
|
||||
describe("omniroute_set_routing_strategy MCP tool schema", () => {
|
||||
it("should be registered in MCP_TOOLS", () => {
|
||||
const tool = MCP_TOOLS.find((t) => t.name === "omniroute_set_routing_strategy");
|
||||
expect(tool).toBeDefined();
|
||||
expect(tool?.phase).toBe(2);
|
||||
});
|
||||
|
||||
it("should be available in MCP_TOOL_MAP", () => {
|
||||
expect(MCP_TOOL_MAP["omniroute_set_routing_strategy"]).toBeDefined();
|
||||
});
|
||||
|
||||
it("should require write:combos scope", () => {
|
||||
expect(setRoutingStrategyTool.scopes).toContain("write:combos");
|
||||
});
|
||||
|
||||
it("should validate a standard strategy payload", () => {
|
||||
const result = setRoutingStrategyInput.safeParse({
|
||||
comboId: "my-combo",
|
||||
strategy: "cost-optimized",
|
||||
});
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it("should validate auto strategy with autoRoutingStrategy", () => {
|
||||
const result = setRoutingStrategyInput.safeParse({
|
||||
comboId: "my-combo",
|
||||
strategy: "auto",
|
||||
autoRoutingStrategy: "latency",
|
||||
});
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it("should reject unknown strategy", () => {
|
||||
const result = setRoutingStrategyInput.safeParse({
|
||||
comboId: "my-combo",
|
||||
strategy: "unknown-strategy",
|
||||
});
|
||||
expect(result.success).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -107,6 +107,7 @@ export const listCombosOutput = z.object({
|
||||
"priority",
|
||||
"weighted",
|
||||
"round-robin",
|
||||
"strict-random",
|
||||
"random",
|
||||
"least-used",
|
||||
"cost-optimized",
|
||||
@@ -470,7 +471,53 @@ export const setBudgetGuardTool: McpToolDefinition<
|
||||
sourceEndpoints: ["/api/usage/budget"],
|
||||
};
|
||||
|
||||
// --- Tool 11: omniroute_set_resilience_profile ---
|
||||
// --- Tool 11: omniroute_set_routing_strategy ---
|
||||
export const setRoutingStrategyInput = z.object({
|
||||
comboId: z.string().describe("Combo ID or name to update"),
|
||||
strategy: z
|
||||
.enum([
|
||||
"priority",
|
||||
"weighted",
|
||||
"round-robin",
|
||||
"strict-random",
|
||||
"random",
|
||||
"least-used",
|
||||
"cost-optimized",
|
||||
"auto",
|
||||
])
|
||||
.describe("Routing strategy to apply"),
|
||||
autoRoutingStrategy: z
|
||||
.enum(["rules", "cost", "eco", "latency", "fast"])
|
||||
.optional()
|
||||
.describe("Optional strategy used by auto mode (only used when strategy='auto')"),
|
||||
});
|
||||
|
||||
export const setRoutingStrategyOutput = z.object({
|
||||
success: z.boolean(),
|
||||
combo: z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
strategy: z.string(),
|
||||
autoRoutingStrategy: z.string().nullable(),
|
||||
}),
|
||||
});
|
||||
|
||||
export const setRoutingStrategyTool: McpToolDefinition<
|
||||
typeof setRoutingStrategyInput,
|
||||
typeof setRoutingStrategyOutput
|
||||
> = {
|
||||
name: "omniroute_set_routing_strategy",
|
||||
description:
|
||||
"Updates a combo routing strategy (priority/weighted/auto/etc.) at runtime. Supports selecting the sub-strategy used by auto mode (rules/cost/latency).",
|
||||
inputSchema: setRoutingStrategyInput,
|
||||
outputSchema: setRoutingStrategyOutput,
|
||||
scopes: ["write:combos"],
|
||||
auditLevel: "full",
|
||||
phase: 2,
|
||||
sourceEndpoints: ["/api/combos", "/api/combos/{id}"],
|
||||
};
|
||||
|
||||
// --- Tool 12: omniroute_set_resilience_profile ---
|
||||
export const setResilienceProfileInput = z.object({
|
||||
profile: z
|
||||
.enum(["aggressive", "balanced", "conservative"])
|
||||
@@ -502,7 +549,7 @@ export const setResilienceProfileTool: McpToolDefinition<
|
||||
sourceEndpoints: ["/api/resilience"],
|
||||
};
|
||||
|
||||
// --- Tool 12: omniroute_test_combo ---
|
||||
// --- Tool 13: omniroute_test_combo ---
|
||||
export const testComboInput = z.object({
|
||||
comboId: z.string().describe("ID of the combo to test"),
|
||||
testPrompt: z.string().max(500).describe("Short test prompt (max 500 chars)"),
|
||||
@@ -540,7 +587,7 @@ export const testComboTool: McpToolDefinition<typeof testComboInput, typeof test
|
||||
sourceEndpoints: ["/api/combos/test", "/v1/chat/completions"],
|
||||
};
|
||||
|
||||
// --- Tool 13: omniroute_get_provider_metrics ---
|
||||
// --- Tool 14: omniroute_get_provider_metrics ---
|
||||
export const getProviderMetricsInput = z.object({
|
||||
provider: z.string().describe("Provider name (e.g., 'claude', 'gemini-cli', 'codex')"),
|
||||
});
|
||||
@@ -583,7 +630,7 @@ export const getProviderMetricsTool: McpToolDefinition<
|
||||
sourceEndpoints: ["/api/provider-metrics", "/api/resilience"],
|
||||
};
|
||||
|
||||
// --- Tool 14: omniroute_best_combo_for_task ---
|
||||
// --- Tool 15: omniroute_best_combo_for_task ---
|
||||
export const bestComboForTaskInput = z.object({
|
||||
taskType: z
|
||||
.enum(["coding", "review", "planning", "analysis", "debugging", "documentation"])
|
||||
@@ -628,7 +675,7 @@ export const bestComboForTaskTool: McpToolDefinition<
|
||||
sourceEndpoints: ["/api/combos", "/api/combos/metrics", "/api/monitoring/health"],
|
||||
};
|
||||
|
||||
// --- Tool 15: omniroute_explain_route ---
|
||||
// --- Tool 16: omniroute_explain_route ---
|
||||
export const explainRouteInput = z.object({
|
||||
requestId: z.string().describe("Request ID from the X-Request-Id header"),
|
||||
});
|
||||
@@ -674,7 +721,7 @@ export const explainRouteTool: McpToolDefinition<
|
||||
sourceEndpoints: [],
|
||||
};
|
||||
|
||||
// --- Tool 16: omniroute_get_session_snapshot ---
|
||||
// --- Tool 17: omniroute_get_session_snapshot ---
|
||||
export const getSessionSnapshotInput = z.object({}).describe("No parameters required");
|
||||
|
||||
export const getSessionSnapshotOutput = z.object({
|
||||
@@ -723,7 +770,7 @@ export const getSessionSnapshotTool: McpToolDefinition<
|
||||
sourceEndpoints: ["/api/usage/analytics", "/api/telemetry/summary"],
|
||||
};
|
||||
|
||||
// --- Tool 17: omniroute_sync_pricing ---
|
||||
// --- Tool 18: omniroute_sync_pricing ---
|
||||
export const syncPricingInput = z.object({
|
||||
sources: z
|
||||
.array(z.string())
|
||||
@@ -775,6 +822,7 @@ export const MCP_TOOLS = [
|
||||
// Phase 2: Advanced
|
||||
simulateRouteTool,
|
||||
setBudgetGuardTool,
|
||||
setRoutingStrategyTool,
|
||||
setResilienceProfileTool,
|
||||
testComboTool,
|
||||
getProviderMetricsTool,
|
||||
|
||||
@@ -25,6 +25,7 @@ import {
|
||||
listModelsCatalogInput,
|
||||
simulateRouteInput,
|
||||
setBudgetGuardInput,
|
||||
setRoutingStrategyInput,
|
||||
setResilienceProfileInput,
|
||||
testComboInput,
|
||||
getProviderMetricsInput,
|
||||
@@ -45,6 +46,7 @@ import {
|
||||
import {
|
||||
handleSimulateRoute,
|
||||
handleSetBudgetGuard,
|
||||
handleSetRoutingStrategy,
|
||||
handleSetResilienceProfile,
|
||||
handleTestCombo,
|
||||
handleGetProviderMetrics,
|
||||
@@ -593,6 +595,18 @@ export function createMcpServer(): McpServer {
|
||||
)
|
||||
);
|
||||
|
||||
server.registerTool(
|
||||
"omniroute_set_routing_strategy",
|
||||
{
|
||||
description:
|
||||
"Updates combo routing strategy at runtime (priority/weighted/round-robin/auto/etc.)",
|
||||
inputSchema: setRoutingStrategyInput,
|
||||
},
|
||||
withScopeEnforcement("omniroute_set_routing_strategy", (args) =>
|
||||
handleSetRoutingStrategy(setRoutingStrategyInput.parse(args))
|
||||
)
|
||||
);
|
||||
|
||||
server.registerTool(
|
||||
"omniroute_set_resilience_profile",
|
||||
{
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
/**
|
||||
* OmniRoute MCP Advanced Tools — 8 intelligence tools that differentiate
|
||||
* OmniRoute MCP Advanced Tools — 10 intelligence tools that differentiate
|
||||
* OmniRoute from all other AI gateways.
|
||||
*
|
||||
* Tools:
|
||||
* 1. omniroute_simulate_route — Dry-run routing simulation
|
||||
* 2. omniroute_set_budget_guard — Session budget with degrade/block/alert
|
||||
* 3. omniroute_set_resilience_profile — Circuit breaker/retry profiles
|
||||
* 4. omniroute_test_combo — Live test each provider in a combo
|
||||
* 5. omniroute_get_provider_metrics — Detailed per-provider metrics
|
||||
* 6. omniroute_best_combo_for_task — AI-powered combo recommendation
|
||||
* 7. omniroute_explain_route — Post-hoc routing decision explainer
|
||||
* 8. omniroute_get_session_snapshot — Full session state snapshot
|
||||
* 3. omniroute_set_routing_strategy — Runtime strategy switch for combos
|
||||
* 4. omniroute_set_resilience_profile — Circuit breaker/retry profiles
|
||||
* 5. omniroute_test_combo — Live test each provider in a combo
|
||||
* 6. omniroute_get_provider_metrics — Detailed per-provider metrics
|
||||
* 7. omniroute_best_combo_for_task — AI-powered combo recommendation
|
||||
* 8. omniroute_explain_route — Post-hoc routing decision explainer
|
||||
* 9. omniroute_get_session_snapshot — Full session state snapshot
|
||||
* 10. omniroute_sync_pricing — Sync provider pricing from external source
|
||||
*/
|
||||
|
||||
import { logToolCall } from "../audit.ts";
|
||||
@@ -335,6 +337,108 @@ export async function handleSetBudgetGuard(args: {
|
||||
}
|
||||
}
|
||||
|
||||
export async function handleSetRoutingStrategy(args: {
|
||||
comboId: string;
|
||||
strategy:
|
||||
| "priority"
|
||||
| "weighted"
|
||||
| "round-robin"
|
||||
| "strict-random"
|
||||
| "random"
|
||||
| "least-used"
|
||||
| "cost-optimized"
|
||||
| "auto";
|
||||
autoRoutingStrategy?: "rules" | "cost" | "eco" | "latency" | "fast";
|
||||
}) {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const combos = normalizeCombosResponse(await apiFetch("/api/combos"));
|
||||
const combo = combos.find(
|
||||
(comboEntry) =>
|
||||
toString(comboEntry.id) === args.comboId || toString(comboEntry.name) === args.comboId
|
||||
);
|
||||
|
||||
if (!combo) {
|
||||
const msg = `Combo '${args.comboId}' not found`;
|
||||
await logToolCall(
|
||||
"omniroute_set_routing_strategy",
|
||||
args,
|
||||
null,
|
||||
Date.now() - start,
|
||||
false,
|
||||
msg
|
||||
);
|
||||
return { content: [{ type: "text" as const, text: `Error: ${msg}` }], isError: true };
|
||||
}
|
||||
|
||||
const comboId = toString(combo.id);
|
||||
if (!comboId) {
|
||||
const msg = "Matched combo has no id";
|
||||
await logToolCall(
|
||||
"omniroute_set_routing_strategy",
|
||||
args,
|
||||
null,
|
||||
Date.now() - start,
|
||||
false,
|
||||
msg
|
||||
);
|
||||
return { content: [{ type: "text" as const, text: `Error: ${msg}` }], isError: true };
|
||||
}
|
||||
|
||||
const comboData = toRecord(combo.data);
|
||||
const currentConfig = toRecord(
|
||||
Object.keys(toRecord(combo.config)).length > 0 ? combo.config : comboData.config
|
||||
);
|
||||
|
||||
let nextConfig: JsonRecord | undefined = undefined;
|
||||
if (args.strategy === "auto" && args.autoRoutingStrategy) {
|
||||
const currentAutoConfig = toRecord(currentConfig.auto);
|
||||
nextConfig = {
|
||||
...currentConfig,
|
||||
auto: {
|
||||
...currentAutoConfig,
|
||||
routingStrategy: args.autoRoutingStrategy,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const payload: JsonRecord = { strategy: args.strategy };
|
||||
if (nextConfig && Object.keys(nextConfig).length > 0) {
|
||||
payload.config = nextConfig;
|
||||
}
|
||||
|
||||
const updatedCombo = toRecord(
|
||||
await apiFetch(`/api/combos/${encodeURIComponent(comboId)}`, {
|
||||
method: "PUT",
|
||||
body: JSON.stringify(payload),
|
||||
})
|
||||
);
|
||||
|
||||
const updatedConfig = toRecord(updatedCombo.config);
|
||||
const resolvedAutoStrategy =
|
||||
toString(toRecord(updatedConfig.auto).routingStrategy) ||
|
||||
(args.strategy === "auto" ? (args.autoRoutingStrategy ?? "rules") : "");
|
||||
|
||||
const result = {
|
||||
success: true,
|
||||
combo: {
|
||||
id: toString(updatedCombo.id, comboId),
|
||||
name: toString(updatedCombo.name, toString(combo.name, comboId)),
|
||||
strategy: toString(updatedCombo.strategy, args.strategy),
|
||||
autoRoutingStrategy:
|
||||
toString(updatedCombo.strategy, args.strategy) === "auto" ? resolvedAutoStrategy : null,
|
||||
},
|
||||
};
|
||||
|
||||
await logToolCall("omniroute_set_routing_strategy", args, result, Date.now() - start, true);
|
||||
return { content: [{ type: "text" as const, text: JSON.stringify(result, null, 2) }] };
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
await logToolCall("omniroute_set_routing_strategy", args, null, Date.now() - start, false, msg);
|
||||
return { content: [{ type: "text" as const, text: `Error: ${msg}` }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
export async function handleSetResilienceProfile(args: {
|
||||
profile: "aggressive" | "balanced" | "conservative";
|
||||
}) {
|
||||
|
||||
@@ -20,6 +20,7 @@ import {
|
||||
import { getTaskFitness } from "./taskFitness";
|
||||
import { getModePack } from "./modePacks";
|
||||
import { getSelfHealingManager } from "./selfHealing";
|
||||
import { classifyPromptIntent } from "../intentClassifier";
|
||||
|
||||
export interface AutoComboConfig {
|
||||
id: string;
|
||||
@@ -30,6 +31,8 @@ export interface AutoComboConfig {
|
||||
modePack?: string;
|
||||
budgetCap?: number; // max cost per request in USD
|
||||
explorationRate: number; // 0.05 = 5% exploratory
|
||||
/** If set, RouterStrategy name to use for selection ('rules' | 'cost' | 'latency') */
|
||||
routerStrategy?: string;
|
||||
}
|
||||
|
||||
export interface SelectionResult {
|
||||
@@ -43,14 +46,44 @@ export interface SelectionResult {
|
||||
|
||||
/**
|
||||
* Select the best provider from an auto-combo pool.
|
||||
*
|
||||
* @param config - AutoCombo configuration
|
||||
* @param candidates - Provider candidates to score
|
||||
* @param taskType - Task type hint. When "default" or omitted, the engine will attempt
|
||||
* to infer the intent from `promptMessages` using multilingual classification.
|
||||
* @param promptMessages - Optional raw messages for intent classification
|
||||
*/
|
||||
export function selectProvider(
|
||||
config: AutoComboConfig,
|
||||
candidates: ProviderCandidate[],
|
||||
taskType: string = "default"
|
||||
taskType: string = "default",
|
||||
promptMessages?: Array<{ role: string; content: unknown }>
|
||||
): SelectionResult {
|
||||
const healer = getSelfHealingManager();
|
||||
|
||||
// ── Intent classification (ClawRouter Feature #10/11) ────────────────────
|
||||
// When taskType is generic ('default'), attempt to classify the prompt intent
|
||||
// using the multilingual intentClassifier for better task fitness scoring.
|
||||
let effectiveTaskType = taskType;
|
||||
if ((taskType === "default" || taskType === "") && promptMessages?.length) {
|
||||
// Extract text from last user message for classification
|
||||
const lastUserMsg = [...promptMessages].reverse().find((m) => m.role === "user");
|
||||
if (lastUserMsg) {
|
||||
const text =
|
||||
typeof lastUserMsg.content === "string"
|
||||
? lastUserMsg.content
|
||||
: Array.isArray(lastUserMsg.content)
|
||||
? (lastUserMsg.content as Array<{ type: string; text?: string }>)
|
||||
.filter((b) => b.type === "text")
|
||||
.map((b) => b.text || "")
|
||||
.join(" ")
|
||||
: "";
|
||||
if (text.length > 10) {
|
||||
const intent = classifyPromptIntent(text);
|
||||
effectiveTaskType = intent; // 'code' | 'reasoning' | 'simple' | 'medium'
|
||||
}
|
||||
}
|
||||
}
|
||||
// Resolve weights from mode pack or config
|
||||
let weights = config.weights;
|
||||
if (config.modePack) {
|
||||
@@ -80,8 +113,8 @@ export function selectProvider(
|
||||
excluded.length = 0;
|
||||
}
|
||||
|
||||
// Score all providers
|
||||
const scored = scorePool(pool, taskType, weights, getTaskFitness);
|
||||
// Score all providers (using classified intent if available)
|
||||
const scored = scorePool(pool, effectiveTaskType, weights, getTaskFitness);
|
||||
|
||||
// Apply self-healing re-evaluation with actual scores
|
||||
const finalCandidates = scored.filter((s) => {
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* RouterStrategy — Pluggable Routing Strategy System
|
||||
*
|
||||
* Inspired by ClawRouter commit 14c83c258 "refactor: extract routing into pluggable RouterStrategy system".
|
||||
* Provides a RouterStrategy interface and two built-in implementations:
|
||||
* - RulesStrategy (default): wraps the existing 6-factor scoring engine
|
||||
* - CostStrategy: always picks cheapest available model
|
||||
*/
|
||||
|
||||
import type { ProviderCandidate, ScoredProvider } from "./scoring.ts";
|
||||
import { scorePool } from "./scoring.ts";
|
||||
import { getTaskFitness } from "./taskFitness.ts";
|
||||
|
||||
export interface RoutingContext {
|
||||
taskType: string;
|
||||
requestHasTools?: boolean;
|
||||
requestHasVision?: boolean;
|
||||
estimatedInputTokens?: number;
|
||||
}
|
||||
|
||||
export interface RoutingDecision {
|
||||
provider: string;
|
||||
model: string;
|
||||
strategy: string;
|
||||
reason: string;
|
||||
candidatesConsidered: number;
|
||||
finalScore: number;
|
||||
}
|
||||
|
||||
export interface RouterStrategy {
|
||||
readonly name: string;
|
||||
readonly description: string;
|
||||
select(pool: ProviderCandidate[], context: RoutingContext): RoutingDecision;
|
||||
}
|
||||
|
||||
// ── RulesStrategy: wraps 6-factor scoring engine ────────────────────────────
|
||||
|
||||
class RulesStrategyImpl implements RouterStrategy {
|
||||
readonly name = "rules";
|
||||
readonly description =
|
||||
"6-factor weighted scoring: quota, health, cost, latency, taskFit, stability";
|
||||
|
||||
select(pool: ProviderCandidate[], context: RoutingContext): RoutingDecision {
|
||||
const eligible = pool.filter((c) => c.circuitBreakerState !== "OPEN");
|
||||
const ranked: ScoredProvider[] = scorePool(
|
||||
eligible.length > 0 ? eligible : pool,
|
||||
context.taskType,
|
||||
undefined,
|
||||
getTaskFitness
|
||||
);
|
||||
const best = ranked[0];
|
||||
if (!best) throw new Error("[RulesStrategy] No candidates to score");
|
||||
return {
|
||||
provider: best.provider,
|
||||
model: best.model,
|
||||
strategy: this.name,
|
||||
reason: `RulesStrategy: score=${best.score.toFixed(3)} (quota=${best.factors.quota.toFixed(2)}, health=${best.factors.health.toFixed(2)}, cost=${best.factors.costInv.toFixed(2)}, taskFit=${best.factors.taskFit.toFixed(2)})`,
|
||||
candidatesConsidered: ranked.length,
|
||||
finalScore: best.score,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ── CostStrategy: always picks cheapest healthy provider ─────────────────────
|
||||
|
||||
class CostStrategyImpl implements RouterStrategy {
|
||||
readonly name = "cost";
|
||||
readonly description = "Always selects cheapest available provider (by costPer1MTokens)";
|
||||
|
||||
select(pool: ProviderCandidate[], context: RoutingContext): RoutingDecision {
|
||||
const healthy = pool.filter((c) => c.circuitBreakerState !== "OPEN");
|
||||
const candidates = healthy.length > 0 ? healthy : pool;
|
||||
const sorted = [...candidates].sort((a, b) => a.costPer1MTokens - b.costPer1MTokens);
|
||||
const best = sorted[0];
|
||||
if (!best) throw new Error("[CostStrategy] No candidates available");
|
||||
return {
|
||||
provider: best.provider,
|
||||
model: best.model,
|
||||
strategy: this.name,
|
||||
reason: `CostStrategy: cheapest at $${best.costPer1MTokens.toFixed(3)}/1M tokens`,
|
||||
candidatesConsidered: candidates.length,
|
||||
finalScore: best.costPer1MTokens === 0 ? 1.0 : 1 / best.costPer1MTokens,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ── LatencyStrategy: prioritize low latency + reliability ───────────────────
|
||||
|
||||
class LatencyStrategyImpl implements RouterStrategy {
|
||||
readonly name = "latency";
|
||||
readonly description = "Prioritizes lowest p95 latency with reliability weighting";
|
||||
|
||||
select(pool: ProviderCandidate[], context: RoutingContext): RoutingDecision {
|
||||
const healthy = pool.filter((c) => c.circuitBreakerState !== "OPEN");
|
||||
const candidates = healthy.length > 0 ? healthy : pool;
|
||||
const sorted = [...candidates].sort((a, b) => {
|
||||
const aPenalty = a.errorRate * 1000;
|
||||
const bPenalty = b.errorRate * 1000;
|
||||
return a.p95LatencyMs + aPenalty - (b.p95LatencyMs + bPenalty);
|
||||
});
|
||||
const best = sorted[0];
|
||||
if (!best) throw new Error("[LatencyStrategy] No candidates available");
|
||||
|
||||
const latencyScore = best.p95LatencyMs > 0 ? Math.max(0.001, 10_000 / best.p95LatencyMs) : 1;
|
||||
const reliability = Math.max(0, 1 - best.errorRate);
|
||||
const finalScore = latencyScore * 0.7 + reliability * 0.3;
|
||||
|
||||
return {
|
||||
provider: best.provider,
|
||||
model: best.model,
|
||||
strategy: this.name,
|
||||
reason: `LatencyStrategy: p95=${best.p95LatencyMs}ms, errorRate=${(best.errorRate * 100).toFixed(2)}%`,
|
||||
candidatesConsidered: candidates.length,
|
||||
finalScore,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ── Registry ──────────────────────────────────────────────────────────────────
|
||||
|
||||
const strategyRegistry = new Map<string, RouterStrategy>();
|
||||
|
||||
const rulesStrategy = new RulesStrategyImpl();
|
||||
const costStrategy = new CostStrategyImpl();
|
||||
const latencyStrategy = new LatencyStrategyImpl();
|
||||
|
||||
strategyRegistry.set("rules", rulesStrategy);
|
||||
strategyRegistry.set("cost", costStrategy);
|
||||
strategyRegistry.set("eco", costStrategy); // alias
|
||||
strategyRegistry.set("latency", latencyStrategy);
|
||||
strategyRegistry.set("fast", latencyStrategy); // alias
|
||||
|
||||
export function getStrategy(name: string): RouterStrategy {
|
||||
const strategy = strategyRegistry.get(name);
|
||||
if (!strategy) {
|
||||
console.warn(`[RouterStrategy] Strategy '${name}' not found, falling back to 'rules'`);
|
||||
return rulesStrategy;
|
||||
}
|
||||
return strategy;
|
||||
}
|
||||
|
||||
export function registerStrategy(name: string, strategy: RouterStrategy): void {
|
||||
if (strategyRegistry.has(name)) {
|
||||
console.warn(`[RouterStrategy] Overwriting strategy '${name}'`);
|
||||
}
|
||||
strategyRegistry.set(name, strategy);
|
||||
}
|
||||
|
||||
export function listStrategies(): Array<{ name: string; description: string }> {
|
||||
return [...strategyRegistry.entries()].map(([name, s]) => ({ name, description: s.description }));
|
||||
}
|
||||
|
||||
export function selectWithStrategy(
|
||||
pool: ProviderCandidate[],
|
||||
context: RoutingContext,
|
||||
strategyName = "rules"
|
||||
): RoutingDecision {
|
||||
return getStrategy(strategyName).select(pool, context);
|
||||
}
|
||||
@@ -74,7 +74,8 @@ export function calculateScore(factors: ScoringFactors, weights: ScoringWeights)
|
||||
weights.costInv * factors.costInv +
|
||||
weights.latencyInv * factors.latencyInv +
|
||||
weights.taskFit * factors.taskFit +
|
||||
weights.stability * factors.stability
|
||||
weights.stability * factors.stability +
|
||||
weights.tierPriority * factors.tierPriority
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -24,10 +24,23 @@ const FITNESS_TABLE: Record<string, Record<string, number>> = {
|
||||
"deepseek-coder": 0.9,
|
||||
"deepseek-v3": 0.85,
|
||||
"deepseek-r1": 0.88,
|
||||
"deepseek-chat": 0.84, // DeepSeek V3.2 Chat — strong code performance
|
||||
"deepseek-v3.2": 0.86, // Explicit V3.2 alias
|
||||
qwen: 0.78,
|
||||
llama: 0.72,
|
||||
mistral: 0.75,
|
||||
mixtral: 0.77,
|
||||
// Grok-4 fast — good code, ultra-low latency (1143ms P50)
|
||||
"grok-4-fast": 0.8,
|
||||
"grok-4": 0.82,
|
||||
"grok-3": 0.8,
|
||||
// Kimi K2.5 — agentic with tool calling, good at code tasks
|
||||
"kimi-k2": 0.82,
|
||||
// GLM-5 — Z.AI model with 128k output
|
||||
"glm-5": 0.78,
|
||||
// MiniMax M2.5 — reasoning support helps complex code
|
||||
"minimax-m2.5": 0.75,
|
||||
"minimax-m2": 0.72,
|
||||
},
|
||||
review: {
|
||||
"claude-sonnet": 0.92,
|
||||
@@ -58,10 +71,15 @@ const FITNESS_TABLE: Record<string, Record<string, number>> = {
|
||||
"claude-sonnet": 0.92,
|
||||
"gemini-2.5-pro": 0.95,
|
||||
"gemini-pro": 0.88,
|
||||
"gemini-3.1-pro": 0.95, // Gemini 3.1 Pro — 1M context, ideal for long analysis
|
||||
"gpt-4o": 0.85,
|
||||
o1: 0.9,
|
||||
o3: 0.93,
|
||||
"deepseek-r1": 0.88,
|
||||
"deepseek-chat": 0.8,
|
||||
"kimi-k2": 0.82, // Kimi K2.5 agentic — good for analysis
|
||||
"glm-5": 0.78, // GLM-5 with 128k output for long analysis
|
||||
"minimax-m2.5": 0.76,
|
||||
},
|
||||
debugging: {
|
||||
"claude-sonnet": 0.93,
|
||||
@@ -87,8 +105,17 @@ const FITNESS_TABLE: Record<string, Record<string, number>> = {
|
||||
"claude-opus": 0.85,
|
||||
"gpt-4o": 0.85,
|
||||
"gemini-pro": 0.8,
|
||||
"gemini-3.1-pro": 0.85,
|
||||
"deepseek-v3": 0.75,
|
||||
"deepseek-chat": 0.74,
|
||||
"gemini-flash": 0.72,
|
||||
// New models from ClawRouter analysis (2026-03-17):
|
||||
"grok-4-fast": 0.72, // ultra-fast, suitable for all tasks
|
||||
"grok-4": 0.74,
|
||||
"grok-3": 0.73,
|
||||
"kimi-k2": 0.76, // agentic multi-step tasks
|
||||
"glm-5": 0.7,
|
||||
"minimax-m2.5": 0.7,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -5,18 +5,37 @@
|
||||
|
||||
import { checkFallbackError, formatRetryAfter, getProviderProfile } from "./accountFallback.ts";
|
||||
import { unavailableResponse } from "../utils/error.ts";
|
||||
import { recordComboRequest, getComboMetrics } from "./comboMetrics.ts";
|
||||
import { recordComboIntent, recordComboRequest, getComboMetrics } from "./comboMetrics.ts";
|
||||
import { resolveComboConfig, getDefaultComboConfig } from "./comboConfig.ts";
|
||||
import * as semaphore from "./rateLimitSemaphore.ts";
|
||||
import { getCircuitBreaker } from "../../src/shared/utils/circuitBreaker";
|
||||
import { fisherYatesShuffle, getNextFromDeck } from "../../src/shared/utils/shuffleDeck";
|
||||
import { parseModel } from "./model.ts";
|
||||
import { applyComboAgentMiddleware, injectModelTag } from "./comboAgentMiddleware.ts";
|
||||
import { classifyWithConfig, DEFAULT_INTENT_CONFIG } from "./intentClassifier.ts";
|
||||
import { selectProvider as selectAutoProvider } from "./autoCombo/engine.ts";
|
||||
import { selectWithStrategy } from "./autoCombo/routerStrategy.ts";
|
||||
import { DEFAULT_WEIGHTS, scorePool } from "./autoCombo/scoring.ts";
|
||||
import { supportsToolCalling } from "./modelCapabilities.ts";
|
||||
|
||||
// Status codes that should mark semaphore + record circuit breaker failures
|
||||
const TRANSIENT_FOR_BREAKER = [429, 502, 503, 504];
|
||||
|
||||
const MAX_COMBO_DEPTH = 3;
|
||||
|
||||
// Bootstrap defaults from ClawRouter benchmark (used when no local latency history exists yet)
|
||||
const DEFAULT_MODEL_P95_MS = {
|
||||
"grok-4-fast-non-reasoning": 1143,
|
||||
"grok-4-1-fast-non-reasoning": 1244,
|
||||
"gemini-2.5-flash": 1238,
|
||||
"kimi-k2.5": 1646,
|
||||
"gpt-4o-mini": 2764,
|
||||
"claude-sonnet-4.6": 4000,
|
||||
"claude-opus-4.6": 6000,
|
||||
"deepseek-chat": 2000,
|
||||
};
|
||||
const MIN_HISTORY_SAMPLES = 10;
|
||||
|
||||
// In-memory atomic counter per combo for round-robin distribution
|
||||
// Resets on server restart (by design — no stale state)
|
||||
const rrCounters = new Map();
|
||||
@@ -201,6 +220,193 @@ function sortModelsByUsage(models, comboName) {
|
||||
return withUsage.map((e) => e.modelStr);
|
||||
}
|
||||
|
||||
function toTextContent(content) {
|
||||
if (typeof content === "string") return content;
|
||||
if (!Array.isArray(content)) return "";
|
||||
return content
|
||||
.map((part) => {
|
||||
if (!part || typeof part !== "object") return "";
|
||||
if (typeof part.text === "string") return part.text;
|
||||
return "";
|
||||
})
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function extractPromptForIntent(body) {
|
||||
if (!body || typeof body !== "object") return "";
|
||||
|
||||
const fromMessages = Array.isArray(body.messages)
|
||||
? [...body.messages].reverse().find((m) => m && typeof m === "object" && m.role === "user")
|
||||
: null;
|
||||
if (fromMessages) return toTextContent(fromMessages.content);
|
||||
|
||||
if (typeof body.input === "string") return body.input;
|
||||
if (Array.isArray(body.input)) {
|
||||
const text = body.input
|
||||
.map((item) => {
|
||||
if (!item || typeof item !== "object") return "";
|
||||
if (typeof item.content === "string") return item.content;
|
||||
if (typeof item.text === "string") return item.text;
|
||||
return "";
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
if (text) return text;
|
||||
}
|
||||
|
||||
if (typeof body.prompt === "string") return body.prompt;
|
||||
return "";
|
||||
}
|
||||
|
||||
function mapIntentToTaskType(intent) {
|
||||
switch (intent) {
|
||||
case "code":
|
||||
return "coding";
|
||||
case "reasoning":
|
||||
return "analysis";
|
||||
case "simple":
|
||||
return "default";
|
||||
case "medium":
|
||||
default:
|
||||
return "default";
|
||||
}
|
||||
}
|
||||
|
||||
function toStringArray(input) {
|
||||
if (Array.isArray(input)) {
|
||||
return input.map((v) => (typeof v === "string" ? v.trim() : "")).filter(Boolean);
|
||||
}
|
||||
if (typeof input === "string") {
|
||||
return input
|
||||
.split(",")
|
||||
.map((v) => v.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function getIntentConfig(settings, combo) {
|
||||
const comboIntentConfig =
|
||||
combo?.autoConfig?.intentConfig ||
|
||||
combo?.config?.auto?.intentConfig ||
|
||||
combo?.config?.intentConfig ||
|
||||
{};
|
||||
|
||||
return {
|
||||
...DEFAULT_INTENT_CONFIG,
|
||||
...comboIntentConfig,
|
||||
...(typeof settings?.intentDetectionEnabled === "boolean"
|
||||
? { enabled: settings.intentDetectionEnabled }
|
||||
: {}),
|
||||
...(Number.isFinite(Number(settings?.intentSimpleMaxWords))
|
||||
? { simpleMaxWords: Number(settings.intentSimpleMaxWords) }
|
||||
: {}),
|
||||
...(toStringArray(settings?.intentExtraCodeKeywords).length > 0
|
||||
? { extraCodeKeywords: toStringArray(settings.intentExtraCodeKeywords) }
|
||||
: {}),
|
||||
...(toStringArray(settings?.intentExtraReasoningKeywords).length > 0
|
||||
? { extraReasoningKeywords: toStringArray(settings.intentExtraReasoningKeywords) }
|
||||
: {}),
|
||||
...(toStringArray(settings?.intentExtraSimpleKeywords).length > 0
|
||||
? { extraSimpleKeywords: toStringArray(settings.intentExtraSimpleKeywords) }
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
|
||||
function getBootstrapLatencyMs(modelId) {
|
||||
const normalized = String(modelId || "").toLowerCase();
|
||||
return DEFAULT_MODEL_P95_MS[normalized] ?? 1500;
|
||||
}
|
||||
|
||||
async function buildAutoCandidates(modelStrings, comboName) {
|
||||
const metrics = getComboMetrics(comboName);
|
||||
const { getPricingForModel } = await import("../../src/lib/localDb");
|
||||
let historicalLatencyStats = {};
|
||||
try {
|
||||
const { getModelLatencyStats } = await import("../../src/lib/usageDb");
|
||||
historicalLatencyStats = await getModelLatencyStats({
|
||||
windowHours: 24,
|
||||
minSamples: 3,
|
||||
maxRows: 10000,
|
||||
});
|
||||
} catch {
|
||||
// keep empty stats — auto-combo will use runtime + bootstrap signals
|
||||
}
|
||||
|
||||
const candidates = await Promise.all(
|
||||
modelStrings.map(async (modelStr) => {
|
||||
const parsed = parseModel(modelStr);
|
||||
const provider = parsed.provider || parsed.providerAlias || "unknown";
|
||||
const model = parsed.model || modelStr;
|
||||
const historicalKey = `${provider}/${model}`;
|
||||
const historicalModelMetric = historicalLatencyStats[historicalKey] || null;
|
||||
const historicalTotal = Number(historicalModelMetric?.totalRequests);
|
||||
const hasHistoricalSignal =
|
||||
Number.isFinite(historicalTotal) && historicalTotal >= MIN_HISTORY_SAMPLES;
|
||||
|
||||
let costPer1MTokens = 1;
|
||||
try {
|
||||
const pricing = await getPricingForModel(provider, model);
|
||||
const inputPrice = Number(pricing?.input);
|
||||
if (Number.isFinite(inputPrice) && inputPrice >= 0) {
|
||||
costPer1MTokens = inputPrice;
|
||||
}
|
||||
} catch {
|
||||
// keep default cost
|
||||
}
|
||||
|
||||
const modelMetric = metrics?.byModel?.[modelStr] || null;
|
||||
const avgLatency = Number(modelMetric?.avgLatencyMs);
|
||||
const successRate = Number(modelMetric?.successRate);
|
||||
const historicalP95Latency = Number(historicalModelMetric?.p95LatencyMs);
|
||||
const historicalStdDev = Number(historicalModelMetric?.latencyStdDev);
|
||||
const historicalSuccessRate = Number(historicalModelMetric?.successRate); // 0..1
|
||||
|
||||
const p95LatencyMs = hasHistoricalSignal
|
||||
? Number.isFinite(historicalP95Latency) && historicalP95Latency > 0
|
||||
? historicalP95Latency
|
||||
: getBootstrapLatencyMs(model)
|
||||
: Number.isFinite(avgLatency) && avgLatency > 0
|
||||
? avgLatency
|
||||
: getBootstrapLatencyMs(model);
|
||||
|
||||
const errorRate = hasHistoricalSignal
|
||||
? Number.isFinite(historicalSuccessRate) &&
|
||||
historicalSuccessRate >= 0 &&
|
||||
historicalSuccessRate <= 1
|
||||
? 1 - historicalSuccessRate
|
||||
: 0.05
|
||||
: Number.isFinite(successRate) && successRate >= 0 && successRate <= 100
|
||||
? 1 - successRate / 100
|
||||
: 0.05;
|
||||
const latencyStdDev =
|
||||
hasHistoricalSignal && Number.isFinite(historicalStdDev) && historicalStdDev > 0
|
||||
? Math.max(10, historicalStdDev)
|
||||
: Math.max(10, p95LatencyMs * 0.1);
|
||||
|
||||
const breakerStateRaw = getCircuitBreaker(`combo:${modelStr}`)?.getStatus?.()?.state;
|
||||
const circuitBreakerState =
|
||||
breakerStateRaw === "OPEN" || breakerStateRaw === "HALF_OPEN" ? breakerStateRaw : "CLOSED";
|
||||
|
||||
return {
|
||||
provider,
|
||||
model,
|
||||
quotaRemaining: 100,
|
||||
quotaTotal: 100,
|
||||
circuitBreakerState,
|
||||
costPer1MTokens,
|
||||
p95LatencyMs,
|
||||
latencyStdDev,
|
||||
errorRate,
|
||||
accountTier: "standard",
|
||||
quotaResetIntervalSecs: 86400,
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle combo chat with fallback
|
||||
* Supports all 6 strategies: priority, weighted, round-robin, random, least-used, cost-optimized
|
||||
@@ -225,12 +431,49 @@ export async function handleComboChat({
|
||||
const strategy = combo.strategy || "priority";
|
||||
const models = combo.models || [];
|
||||
|
||||
// ── Combo Agent Middleware (#399 + #401) ────────────────────────────────
|
||||
// Apply system_message override, tool_filter_regex, and extract pinned model
|
||||
// from context caching tag. These are all opt-in per combo config.
|
||||
const { body: agentBody, pinnedModel } = applyComboAgentMiddleware(
|
||||
body,
|
||||
combo,
|
||||
"" // provider/model not yet known — resolved per-model in loop
|
||||
);
|
||||
body = agentBody;
|
||||
if (pinnedModel) {
|
||||
log.info("COMBO", `[#401] Context caching: pinned model=${pinnedModel}`);
|
||||
}
|
||||
// Wrap handleSingleModel to inject context caching tag on response (#401)
|
||||
const handleSingleModelWrapped = combo.context_cache_protection
|
||||
? async (b, modelStr) => {
|
||||
const res = await handleSingleModel(b, modelStr);
|
||||
// Inject tag only on success and only for non-streaming non-binary responses
|
||||
if (res.ok && !b.stream) {
|
||||
try {
|
||||
const json = await res.clone().json();
|
||||
const msgs = Array.isArray(json?.messages) ? json.messages : [];
|
||||
if (msgs.length > 0) {
|
||||
const tagged = injectModelTag(msgs, modelStr);
|
||||
return new Response(JSON.stringify({ ...json, messages: tagged }), {
|
||||
status: res.status,
|
||||
headers: res.headers,
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
/* non-JSON or stream — skip tagging */
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
: handleSingleModel;
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// Route to round-robin handler if strategy matches
|
||||
if (strategy === "round-robin") {
|
||||
return handleRoundRobinCombo({
|
||||
body,
|
||||
combo,
|
||||
handleSingleModel,
|
||||
handleSingleModel: handleSingleModelWrapped,
|
||||
isModelAvailable,
|
||||
log,
|
||||
settings,
|
||||
@@ -278,7 +521,131 @@ export async function handleComboChat({
|
||||
}
|
||||
|
||||
// Apply strategy-specific ordering
|
||||
if (strategy === "strict-random") {
|
||||
if (strategy === "auto") {
|
||||
const requestHasTools = Array.isArray(body?.tools) && body.tools.length > 0;
|
||||
let eligibleModels = [...orderedModels];
|
||||
|
||||
if (requestHasTools) {
|
||||
const filtered = eligibleModels.filter((m) => supportsToolCalling(m));
|
||||
if (filtered.length > 0) {
|
||||
eligibleModels = filtered;
|
||||
} else {
|
||||
log.warn(
|
||||
"COMBO",
|
||||
"Auto strategy: all candidates filtered by tool-calling policy, falling back to full pool"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const prompt = extractPromptForIntent(body);
|
||||
const systemPrompt =
|
||||
typeof combo?.system_message === "string" ? combo.system_message : undefined;
|
||||
const intentConfig = getIntentConfig(settings, combo);
|
||||
const intent = classifyWithConfig(prompt, intentConfig, systemPrompt);
|
||||
recordComboIntent(combo.name, intent);
|
||||
const taskType = mapIntentToTaskType(intent);
|
||||
|
||||
const autoConfigSource = combo?.autoConfig || combo?.config?.auto || combo?.config || {};
|
||||
const routingStrategy =
|
||||
typeof autoConfigSource.routingStrategy === "string"
|
||||
? autoConfigSource.routingStrategy
|
||||
: typeof autoConfigSource.strategyName === "string"
|
||||
? autoConfigSource.strategyName
|
||||
: "rules";
|
||||
|
||||
const candidatePool = Array.isArray(autoConfigSource.candidatePool)
|
||||
? autoConfigSource.candidatePool
|
||||
: [
|
||||
...new Set(
|
||||
eligibleModels.map((m) => {
|
||||
const parsed = parseModel(m);
|
||||
return parsed.provider || parsed.providerAlias || "unknown";
|
||||
})
|
||||
),
|
||||
];
|
||||
|
||||
const weights =
|
||||
autoConfigSource.weights && typeof autoConfigSource.weights === "object"
|
||||
? autoConfigSource.weights
|
||||
: DEFAULT_WEIGHTS;
|
||||
const explorationRate = Number.isFinite(Number(autoConfigSource.explorationRate))
|
||||
? Number(autoConfigSource.explorationRate)
|
||||
: 0.05;
|
||||
const budgetCap = Number.isFinite(Number(autoConfigSource.budgetCap))
|
||||
? Number(autoConfigSource.budgetCap)
|
||||
: undefined;
|
||||
const modePack =
|
||||
typeof autoConfigSource.modePack === "string" ? autoConfigSource.modePack : undefined;
|
||||
|
||||
const candidates = await buildAutoCandidates(eligibleModels, combo.name);
|
||||
if (candidates.length > 0) {
|
||||
let selectedProvider = null;
|
||||
let selectedModel = null;
|
||||
let selectionReason = "";
|
||||
|
||||
if (routingStrategy !== "rules") {
|
||||
try {
|
||||
const decision = selectWithStrategy(
|
||||
candidates,
|
||||
{ taskType, requestHasTools },
|
||||
routingStrategy
|
||||
);
|
||||
selectedProvider = decision.provider;
|
||||
selectedModel = decision.model;
|
||||
selectionReason = decision.reason;
|
||||
} catch (err) {
|
||||
log.warn(
|
||||
"COMBO",
|
||||
`Auto strategy '${routingStrategy}' failed (${err?.message || "unknown"}), falling back to rules`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (!selectedProvider || !selectedModel) {
|
||||
const selection = selectAutoProvider(
|
||||
{
|
||||
id: combo.id || combo.name,
|
||||
name: combo.name,
|
||||
type: "auto",
|
||||
candidatePool,
|
||||
weights,
|
||||
modePack,
|
||||
budgetCap,
|
||||
explorationRate,
|
||||
},
|
||||
candidates,
|
||||
taskType
|
||||
);
|
||||
selectedProvider = selection.provider;
|
||||
selectedModel = selection.model;
|
||||
selectionReason = `score=${selection.score.toFixed(3)}${selection.isExploration ? " (exploration)" : ""}`;
|
||||
}
|
||||
|
||||
const modelLookup = new Map();
|
||||
for (const modelStr of eligibleModels) {
|
||||
const parsed = parseModel(modelStr);
|
||||
const provider = parsed.provider || parsed.providerAlias || "unknown";
|
||||
const modelId = parsed.model || modelStr;
|
||||
modelLookup.set(`${provider}/${modelId}`, modelStr);
|
||||
}
|
||||
|
||||
const ranked = scorePool(candidates, taskType, weights)
|
||||
.map((r) => modelLookup.get(`${r.provider}/${r.model}`) || `${r.provider}/${r.model}`)
|
||||
.filter(Boolean);
|
||||
|
||||
const selectedModelStr =
|
||||
modelLookup.get(`${selectedProvider}/${selectedModel}`) ||
|
||||
`${selectedProvider}/${selectedModel}`;
|
||||
orderedModels = [...new Set([selectedModelStr, ...ranked, ...eligibleModels])];
|
||||
|
||||
log.info(
|
||||
"COMBO",
|
||||
`Auto selection: ${selectedModelStr} | intent=${intent} task=${taskType} | strategy=${routingStrategy} | ${selectionReason}`
|
||||
);
|
||||
} else {
|
||||
log.warn("COMBO", "Auto strategy has no candidates, keeping default ordering");
|
||||
}
|
||||
} else if (strategy === "strict-random") {
|
||||
const selectedId = await getNextFromDeck(`combo:${combo.name}`, orderedModels);
|
||||
// Put selected model first so the fallback loop tries it first
|
||||
const rest = orderedModels.filter((m) => m !== selectedId);
|
||||
@@ -348,7 +715,7 @@ export async function handleComboChat({
|
||||
`Trying model ${i + 1}/${orderedModels.length}: ${modelStr}${retry > 0 ? ` (retry ${retry})` : ""}`
|
||||
);
|
||||
|
||||
const result = await handleSingleModel(body, modelStr);
|
||||
const result = await handleSingleModelWrapped(body, modelStr);
|
||||
|
||||
// Success — return response
|
||||
if (result.ok) {
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
/**
|
||||
* comboAgentMiddleware.ts — Combo Agent Features
|
||||
*
|
||||
* Implements the "combo as agent" features from issues #399 and #401:
|
||||
*
|
||||
* 1. **System Message Override** (#399): If the combo defines a `system_message`,
|
||||
* it is injected as the first system message, replacing any existing system message.
|
||||
*
|
||||
* 2. **Tool Filter Regex** (#399): If the combo defines a `tool_filter_regex`,
|
||||
* only tools whose name matches the pattern are forwarded to the provider.
|
||||
*
|
||||
* 3. **Context Caching Protection** (#401): If the combo enables
|
||||
* `context_cache_protection`, the proxy:
|
||||
* a. On response: injects `<omniModel>provider/model</omniModel>` tag into
|
||||
* the first assistant message content string.
|
||||
* b. On request: scans the message history for the tag, and if found,
|
||||
* overrides the requested model with the pinned one.
|
||||
*
|
||||
* All features are opt-in per combo and backward compatible with existing setups.
|
||||
*/
|
||||
|
||||
interface ComboConfig {
|
||||
system_message?: string | null;
|
||||
tool_filter_regex?: string | null;
|
||||
context_cache_protection?: number | boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface Message {
|
||||
role?: string;
|
||||
content?: unknown;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ── Context Caching Tag ─────────────────────────────────────────────────────
|
||||
|
||||
const CACHE_TAG_PATTERN = /<omniModel>([^<]+)<\/omniModel>/;
|
||||
|
||||
/**
|
||||
* Inject the model tag into the last assistant message (or append a new one).
|
||||
* Only modifies string content — does not touch array content to avoid breaking
|
||||
* Claude/Gemini multi-part message formats.
|
||||
*/
|
||||
export function injectModelTag(messages: Message[], providerModel: string): Message[] {
|
||||
// Remove any existing tag first to avoid duplication on context compaction
|
||||
const cleaned = messages.map((msg) => {
|
||||
if (msg.role === "assistant" && typeof msg.content === "string") {
|
||||
return { ...msg, content: msg.content.replace(CACHE_TAG_PATTERN, "").trimEnd() };
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
|
||||
// Find last assistant message with string content
|
||||
const lastAssistantIdx = cleaned.map((m) => m.role).lastIndexOf("assistant");
|
||||
if (lastAssistantIdx === -1) return cleaned;
|
||||
|
||||
const msg = cleaned[lastAssistantIdx];
|
||||
if (typeof msg.content !== "string") return cleaned;
|
||||
|
||||
const tagged = [...cleaned];
|
||||
tagged[lastAssistantIdx] = {
|
||||
...msg,
|
||||
content: `${msg.content}\n<omniModel>${providerModel}</omniModel>`,
|
||||
};
|
||||
return tagged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan message history for the model tag injected by a previous response.
|
||||
* Returns the pinned "provider/model" string, or null if not found.
|
||||
*/
|
||||
export function extractPinnedModel(messages: Message[]): string | null {
|
||||
// Scan from newest to oldest for efficiency
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msg = messages[i];
|
||||
if (msg.role === "assistant" && typeof msg.content === "string") {
|
||||
const match = CACHE_TAG_PATTERN.exec(msg.content);
|
||||
if (match) return match[1];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ── System Message Override ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Replace or inject a system message at the beginning of the messages array.
|
||||
* Existing system messages are removed if a combo override is set.
|
||||
*/
|
||||
export function applySystemMessageOverride(messages: Message[], systemMessage: string): Message[] {
|
||||
// Remove all existing system messages
|
||||
const filtered = messages.filter((m) => m.role !== "system");
|
||||
// Inject combo system message at start
|
||||
return [{ role: "system", content: systemMessage }, ...filtered];
|
||||
}
|
||||
|
||||
// ── Tool Filter Regex ────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Filter the tools array, keeping only tools whose name matches the regex.
|
||||
* Returns the original array unchanged if pattern is null/empty.
|
||||
*/
|
||||
export function applyToolFilter(
|
||||
tools: unknown[] | undefined,
|
||||
pattern: string | null | undefined
|
||||
): unknown[] | undefined {
|
||||
if (!tools || !pattern) return tools;
|
||||
|
||||
let regex: RegExp;
|
||||
try {
|
||||
regex = new RegExp(pattern);
|
||||
} catch {
|
||||
// Invalid regex — return tools unchanged rather than crashing
|
||||
console.warn(`[ComboAgent] Invalid tool_filter_regex: "${pattern}"`);
|
||||
return tools;
|
||||
}
|
||||
|
||||
return tools.filter((tool) => {
|
||||
const t = tool as Record<string, unknown>;
|
||||
// Support both OpenAI format ({ function: { name } }) and Anthropic ({ name })
|
||||
const name = (t.function as Record<string, unknown> | undefined)?.name ?? t.name ?? "";
|
||||
return regex.test(String(name));
|
||||
});
|
||||
}
|
||||
|
||||
// ── Main Middleware ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Apply all combo agent features to the request body.
|
||||
* Safe to call with null/undefined comboConfig — returns body unchanged.
|
||||
*/
|
||||
export function applyComboAgentMiddleware(
|
||||
body: Record<string, unknown>,
|
||||
comboConfig: ComboConfig | null | undefined,
|
||||
providerModel: string // "provider/model" string for context caching
|
||||
): { body: Record<string, unknown>; pinnedModel: string | null } {
|
||||
if (!comboConfig) return { body, pinnedModel: null };
|
||||
|
||||
let messages: Message[] = Array.isArray(body.messages) ? [...body.messages] : [];
|
||||
let pinnedModel: string | null = null;
|
||||
|
||||
// 1. Context caching: check for pinned model in history
|
||||
if (comboConfig.context_cache_protection) {
|
||||
pinnedModel = extractPinnedModel(messages);
|
||||
if (pinnedModel) {
|
||||
// Model is pinned — caller should override model selection
|
||||
}
|
||||
}
|
||||
|
||||
// 2. System message override
|
||||
if (comboConfig.system_message && comboConfig.system_message.trim()) {
|
||||
messages = applySystemMessageOverride(messages, comboConfig.system_message);
|
||||
}
|
||||
|
||||
// 3. Tool filter
|
||||
const filteredTools = applyToolFilter(
|
||||
body.tools as unknown[] | undefined,
|
||||
comboConfig.tool_filter_regex
|
||||
);
|
||||
|
||||
return {
|
||||
body: {
|
||||
...body,
|
||||
messages,
|
||||
...(filteredTools !== body.tools && { tools: filteredTools }),
|
||||
},
|
||||
pinnedModel,
|
||||
};
|
||||
}
|
||||
@@ -21,6 +21,7 @@ interface ComboMetricsEntry {
|
||||
totalLatencyMs: number;
|
||||
strategy: string;
|
||||
lastUsedAt: string | null;
|
||||
intentCounts: Record<string, number>;
|
||||
byModel: Record<string, ModelMetrics>;
|
||||
}
|
||||
|
||||
@@ -69,6 +70,7 @@ export function recordComboRequest(
|
||||
totalLatencyMs: 0,
|
||||
strategy,
|
||||
lastUsedAt: null,
|
||||
intentCounts: {},
|
||||
byModel: {},
|
||||
});
|
||||
}
|
||||
@@ -131,6 +133,7 @@ export function getComboMetrics(comboName: string): ComboMetricsView | null {
|
||||
combo.totalRequests > 0 ? Math.round((combo.totalSuccesses / combo.totalRequests) * 100) : 0,
|
||||
fallbackRate:
|
||||
combo.totalRequests > 0 ? Math.round((combo.totalFallbacks / combo.totalRequests) * 100) : 0,
|
||||
intentCounts: { ...combo.intentCounts },
|
||||
byModel: Object.fromEntries(
|
||||
Object.entries(combo.byModel).map(([model, m]) => [
|
||||
model,
|
||||
@@ -156,6 +159,30 @@ export function getAllComboMetrics(): Record<string, ComboMetricsView | null> {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record detected prompt intent for a combo (used by multilingual routing analytics).
|
||||
*/
|
||||
export function recordComboIntent(comboName: string, intent: string): void {
|
||||
if (!metrics.has(comboName)) {
|
||||
metrics.set(comboName, {
|
||||
totalRequests: 0,
|
||||
totalSuccesses: 0,
|
||||
totalFailures: 0,
|
||||
totalFallbacks: 0,
|
||||
totalLatencyMs: 0,
|
||||
strategy: "priority",
|
||||
lastUsedAt: null,
|
||||
intentCounts: {},
|
||||
byModel: {},
|
||||
});
|
||||
}
|
||||
|
||||
const combo = metrics.get(comboName);
|
||||
if (!combo) return;
|
||||
const key = String(intent || "unknown");
|
||||
combo.intentCounts[key] = (combo.intentCounts[key] || 0) + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset metrics for a specific combo
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
/**
|
||||
* Emergency Fallback — Budget Exhaustion Redirect
|
||||
*
|
||||
* When a request fails due to budget exhaustion (HTTP 402 or budget keywords
|
||||
* in the error body), optionally redirect to a free-tier model
|
||||
* (default provider/model: nvidia + openai/gpt-oss-120b at $0.00/M tokens).
|
||||
*
|
||||
* Inspired by ClawRouter: "gpt-oss-120b costs nothing and serves as
|
||||
* automatic fallback when wallet is empty."
|
||||
*/
|
||||
|
||||
export interface EmergencyFallbackConfig {
|
||||
enabled: boolean;
|
||||
provider: string;
|
||||
model: string;
|
||||
triggerOn402: boolean;
|
||||
triggerOnBudgetKeywords: boolean;
|
||||
budgetKeywords: string[];
|
||||
/** Skip fallback for tool requests (gpt-oss-120b may not support structured tool calling) */
|
||||
skipForToolRequests: boolean;
|
||||
maxOutputTokens: number;
|
||||
}
|
||||
|
||||
export const EMERGENCY_FALLBACK_CONFIG: EmergencyFallbackConfig = {
|
||||
enabled: true,
|
||||
provider: "nvidia",
|
||||
model: "openai/gpt-oss-120b",
|
||||
triggerOn402: true,
|
||||
triggerOnBudgetKeywords: true,
|
||||
budgetKeywords: [
|
||||
"insufficient funds",
|
||||
"insufficient_funds",
|
||||
"budget exceeded",
|
||||
"budget_exceeded",
|
||||
"quota exceeded",
|
||||
"quota_exceeded",
|
||||
"billing",
|
||||
"payment required",
|
||||
"out of credits",
|
||||
"no credits",
|
||||
"credit limit",
|
||||
"spending limit",
|
||||
"saldo insuficiente",
|
||||
"limite de gastos",
|
||||
"cota excedida",
|
||||
],
|
||||
skipForToolRequests: true,
|
||||
maxOutputTokens: 4096,
|
||||
};
|
||||
|
||||
export interface FallbackDecision {
|
||||
shouldFallback: true;
|
||||
reason: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
maxOutputTokens: number;
|
||||
}
|
||||
|
||||
export interface NoFallbackDecision {
|
||||
shouldFallback: false;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export type FallbackResult = FallbackDecision | NoFallbackDecision;
|
||||
|
||||
export function shouldUseFallback(
|
||||
status: number,
|
||||
errorBody: string,
|
||||
requestHasTools: boolean,
|
||||
config: EmergencyFallbackConfig = EMERGENCY_FALLBACK_CONFIG
|
||||
): FallbackResult {
|
||||
if (!config.enabled) return { shouldFallback: false, reason: "emergency fallback disabled" };
|
||||
if (config.skipForToolRequests && requestHasTools) {
|
||||
return { shouldFallback: false, reason: "skipped: request has tools" };
|
||||
}
|
||||
if (config.triggerOn402 && status === 402) {
|
||||
return {
|
||||
shouldFallback: true,
|
||||
reason: `HTTP 402 → emergency fallback to ${config.provider}/${config.model}`,
|
||||
provider: config.provider,
|
||||
model: config.model,
|
||||
maxOutputTokens: config.maxOutputTokens,
|
||||
};
|
||||
}
|
||||
if (config.triggerOnBudgetKeywords && errorBody) {
|
||||
const lowerBody = errorBody.toLowerCase();
|
||||
const matched = config.budgetKeywords.find((kw) => lowerBody.includes(kw.toLowerCase()));
|
||||
if (matched) {
|
||||
return {
|
||||
shouldFallback: true,
|
||||
reason: `Budget error detected ('${matched}') → emergency fallback to ${config.provider}/${config.model}`,
|
||||
provider: config.provider,
|
||||
model: config.model,
|
||||
maxOutputTokens: config.maxOutputTokens,
|
||||
};
|
||||
}
|
||||
}
|
||||
return { shouldFallback: false, reason: "no budget error detected" };
|
||||
}
|
||||
|
||||
export function isFallbackDecision(result: FallbackResult): result is FallbackDecision {
|
||||
return result.shouldFallback === true;
|
||||
}
|
||||
@@ -0,0 +1,375 @@
|
||||
/**
|
||||
* Multilingual Intent Detection for AutoCombo
|
||||
*
|
||||
* Classifies prompts as: code | reasoning | simple | medium
|
||||
* using keywords in 9 languages (EN, PT-BR, ES, ZH, JA, RU, DE, KO, AR).
|
||||
*
|
||||
* Inspired by ClawRouter (BlockRunAI) multilingual routing system.
|
||||
* Execution: purely synchronous, <1ms, no I/O.
|
||||
*/
|
||||
|
||||
export type IntentType = "code" | "reasoning" | "simple" | "medium";
|
||||
|
||||
export const CODE_KEYWORDS: readonly string[] = [
|
||||
// English
|
||||
"function",
|
||||
"class",
|
||||
"import",
|
||||
"def",
|
||||
"SELECT",
|
||||
"async",
|
||||
"await",
|
||||
"const",
|
||||
"let",
|
||||
"var",
|
||||
"return",
|
||||
"```",
|
||||
"algorithm",
|
||||
"compile",
|
||||
"debug",
|
||||
"refactor",
|
||||
"typescript",
|
||||
"python",
|
||||
"javascript",
|
||||
"code",
|
||||
"implement",
|
||||
"write a",
|
||||
"create a component",
|
||||
"endpoint",
|
||||
"repository",
|
||||
"deploy",
|
||||
"install",
|
||||
"script",
|
||||
"api",
|
||||
"database",
|
||||
"query",
|
||||
"schema",
|
||||
"interface",
|
||||
"generic",
|
||||
"enum",
|
||||
"module",
|
||||
"package",
|
||||
"dependency",
|
||||
// Português (PT-BR)
|
||||
"função",
|
||||
"classe",
|
||||
"importar",
|
||||
"definir",
|
||||
"consulta",
|
||||
"assíncrono",
|
||||
"aguardar",
|
||||
"constante",
|
||||
"variável",
|
||||
"retornar",
|
||||
"algoritmo",
|
||||
"compilar",
|
||||
"depurar",
|
||||
"refatorar",
|
||||
"código",
|
||||
"implementar",
|
||||
"criar um",
|
||||
"componente",
|
||||
"como fazer",
|
||||
"repositório",
|
||||
"configurar",
|
||||
"instalar",
|
||||
"banco de dados",
|
||||
"escrever uma função",
|
||||
"criar uma classe",
|
||||
// Español
|
||||
"función",
|
||||
"clase",
|
||||
"importar",
|
||||
"definir",
|
||||
"consulta",
|
||||
"asíncrono",
|
||||
"esperar",
|
||||
"constante",
|
||||
"variable",
|
||||
"retornar",
|
||||
"algoritmo",
|
||||
"compilar",
|
||||
"depurar",
|
||||
"refactorizar",
|
||||
"código",
|
||||
"implementar",
|
||||
// 中文
|
||||
"函数",
|
||||
"类",
|
||||
"导入",
|
||||
"定义",
|
||||
"查询",
|
||||
"异步",
|
||||
"等待",
|
||||
"常量",
|
||||
"变量",
|
||||
"返回",
|
||||
"算法",
|
||||
"编译",
|
||||
"调试",
|
||||
"代码",
|
||||
// 日本語
|
||||
"関数",
|
||||
"クラス",
|
||||
"インポート",
|
||||
"非同期",
|
||||
"定数",
|
||||
"変数",
|
||||
"コード",
|
||||
"アルゴリズム",
|
||||
// Русский
|
||||
"функция",
|
||||
"класс",
|
||||
"импорт",
|
||||
"запрос",
|
||||
"асинхронный",
|
||||
"константа",
|
||||
"переменная",
|
||||
"алгоритм",
|
||||
"код",
|
||||
// Deutsch
|
||||
"funktion",
|
||||
"klasse",
|
||||
"importieren",
|
||||
"abfrage",
|
||||
"asynchron",
|
||||
"konstante",
|
||||
"variable",
|
||||
"algorithmus",
|
||||
"code",
|
||||
// 한국어
|
||||
"함수",
|
||||
"클래스",
|
||||
"가져오기",
|
||||
"정의",
|
||||
"쿼리",
|
||||
"비동기",
|
||||
"대기",
|
||||
"상수",
|
||||
"변수",
|
||||
"반환",
|
||||
"코드",
|
||||
// العربية
|
||||
"دالة",
|
||||
"فئة",
|
||||
"استيراد",
|
||||
"استعلام",
|
||||
"غير متزامن",
|
||||
"ثابت",
|
||||
"متغير",
|
||||
"كود",
|
||||
"خوارزمية",
|
||||
];
|
||||
|
||||
export const REASONING_KEYWORDS: readonly string[] = [
|
||||
// English
|
||||
"prove",
|
||||
"theorem",
|
||||
"derive",
|
||||
"step by step",
|
||||
"chain of thought",
|
||||
"formally",
|
||||
"mathematical",
|
||||
"proof",
|
||||
"logically",
|
||||
"analyze",
|
||||
"reasoning",
|
||||
"deduce",
|
||||
"infer",
|
||||
"hypothesis",
|
||||
"convergence",
|
||||
// Português (PT-BR)
|
||||
"provar",
|
||||
"teorema",
|
||||
"derivar",
|
||||
"passo a passo",
|
||||
"cadeia de pensamento",
|
||||
"formalmente",
|
||||
"matemático",
|
||||
"prova",
|
||||
"logicamente",
|
||||
"analisar",
|
||||
"raciocínio",
|
||||
"deduzir",
|
||||
"inferir",
|
||||
"hipótese",
|
||||
"demonstrar",
|
||||
"cálculo",
|
||||
"equação diferencial",
|
||||
"integral",
|
||||
"otimização",
|
||||
// Español
|
||||
"demostrar",
|
||||
"teorema",
|
||||
"derivar",
|
||||
"paso a paso",
|
||||
"formalmente",
|
||||
"matemático",
|
||||
"lógicamente",
|
||||
// 中文
|
||||
"证明",
|
||||
"定理",
|
||||
"推导",
|
||||
"逐步",
|
||||
"思维链",
|
||||
"数学",
|
||||
"逻辑",
|
||||
"分析",
|
||||
// 日本語
|
||||
"証明",
|
||||
"定理",
|
||||
"導出",
|
||||
"論理的",
|
||||
"分析",
|
||||
// Русский
|
||||
"доказать",
|
||||
"теорема",
|
||||
"шаг за шагом",
|
||||
"математически",
|
||||
"логически",
|
||||
// Deutsch
|
||||
"beweisen",
|
||||
"theorem",
|
||||
"schritt für schritt",
|
||||
"mathematisch",
|
||||
"logisch",
|
||||
// 한국어
|
||||
"증명",
|
||||
"정리",
|
||||
"단계별",
|
||||
"수학적",
|
||||
"논리적",
|
||||
// العربية
|
||||
"إثبات",
|
||||
"نظرية",
|
||||
"خطوة بخطوة",
|
||||
"رياضي",
|
||||
"منطقياً",
|
||||
];
|
||||
|
||||
export const SIMPLE_KEYWORDS: readonly string[] = [
|
||||
// English
|
||||
"what is",
|
||||
"define",
|
||||
"translate",
|
||||
"hello",
|
||||
"yes or no",
|
||||
"summarize",
|
||||
"list",
|
||||
"tell me",
|
||||
"who is",
|
||||
// Português (PT-BR)
|
||||
"o que é",
|
||||
"definir",
|
||||
"traduzir",
|
||||
"olá",
|
||||
"oi",
|
||||
"sim ou não",
|
||||
"resumir",
|
||||
"listar",
|
||||
"me diga",
|
||||
"quem é",
|
||||
"quando foi",
|
||||
"onde fica",
|
||||
"explique brevemente",
|
||||
"de forma simples",
|
||||
// Español
|
||||
"qué es",
|
||||
"definir",
|
||||
"traducir",
|
||||
"hola",
|
||||
"resumir",
|
||||
"listar",
|
||||
// 中文
|
||||
"什么是",
|
||||
"定义",
|
||||
"翻译",
|
||||
"你好",
|
||||
"总结",
|
||||
"列出",
|
||||
// Русский
|
||||
"что такое",
|
||||
"определить",
|
||||
"перевести",
|
||||
"привет",
|
||||
"резюмировать",
|
||||
// Deutsch
|
||||
"was ist",
|
||||
"definieren",
|
||||
"übersetzen",
|
||||
"hallo",
|
||||
"zusammenfassen",
|
||||
// 한국어
|
||||
"이란",
|
||||
"정의",
|
||||
"번역",
|
||||
"안녕",
|
||||
"요약",
|
||||
// العربية
|
||||
"ما هو",
|
||||
"تعريف",
|
||||
"ترجمة",
|
||||
"مرحبا",
|
||||
"ملخص",
|
||||
];
|
||||
|
||||
/**
|
||||
* Classify a prompt's intent using multilingual keyword matching.
|
||||
* Priority: code > reasoning > simple > medium (default)
|
||||
*/
|
||||
export function classifyPromptIntent(prompt: string, systemPrompt?: string): IntentType {
|
||||
const fullText = `${systemPrompt ?? ""} ${prompt}`.toLowerCase();
|
||||
const wordCount = prompt.trim().split(/\s+/).length;
|
||||
|
||||
for (const kw of CODE_KEYWORDS) {
|
||||
if (fullText.includes(kw.toLowerCase())) return "code";
|
||||
}
|
||||
for (const kw of REASONING_KEYWORDS) {
|
||||
if (fullText.includes(kw.toLowerCase())) return "reasoning";
|
||||
}
|
||||
if (wordCount < 60) {
|
||||
for (const kw of SIMPLE_KEYWORDS) {
|
||||
if (fullText.includes(kw.toLowerCase())) return "simple";
|
||||
}
|
||||
}
|
||||
return "medium";
|
||||
}
|
||||
|
||||
export interface IntentClassifierConfig {
|
||||
enabled: boolean;
|
||||
extraCodeKeywords?: string[];
|
||||
extraReasoningKeywords?: string[];
|
||||
extraSimpleKeywords?: string[];
|
||||
simpleMaxWords?: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_INTENT_CONFIG: IntentClassifierConfig = {
|
||||
enabled: true,
|
||||
simpleMaxWords: 60,
|
||||
};
|
||||
|
||||
export function classifyWithConfig(
|
||||
prompt: string,
|
||||
config: IntentClassifierConfig,
|
||||
systemPrompt?: string
|
||||
): IntentType {
|
||||
if (!config.enabled) return "medium";
|
||||
const fullText = `${systemPrompt ?? ""} ${prompt}`.toLowerCase();
|
||||
const wordCount = prompt.trim().split(/\s+/).length;
|
||||
const maxSimpleWords = config.simpleMaxWords ?? 60;
|
||||
const codeKws = [...CODE_KEYWORDS, ...(config.extraCodeKeywords ?? [])];
|
||||
const reasoningKws = [...REASONING_KEYWORDS, ...(config.extraReasoningKeywords ?? [])];
|
||||
const simpleKws = [...SIMPLE_KEYWORDS, ...(config.extraSimpleKeywords ?? [])];
|
||||
for (const kw of codeKws) {
|
||||
if (fullText.includes(kw.toLowerCase())) return "code";
|
||||
}
|
||||
for (const kw of reasoningKws) {
|
||||
if (fullText.includes(kw.toLowerCase())) return "reasoning";
|
||||
}
|
||||
if (wordCount < maxSimpleWords) {
|
||||
for (const kw of simpleKws) {
|
||||
if (fullText.includes(kw.toLowerCase())) return "simple";
|
||||
}
|
||||
}
|
||||
return "medium";
|
||||
}
|
||||
@@ -23,6 +23,18 @@ const PROVIDER_MODEL_ALIASES = {
|
||||
"gemini-3-flash": "gemini-3-flash-preview",
|
||||
"raptor-mini": "oswe-vscode-prime",
|
||||
},
|
||||
gemini: {
|
||||
"gemini-3.1-pro-preview": "gemini-3.1-pro",
|
||||
"gemini-3-1-pro": "gemini-3.1-pro",
|
||||
},
|
||||
"gemini-cli": {
|
||||
"gemini-3.1-pro-preview": "gemini-3.1-pro",
|
||||
"gemini-3-1-pro": "gemini-3.1-pro",
|
||||
},
|
||||
nvidia: {
|
||||
"gpt-oss-120b": "openai/gpt-oss-120b",
|
||||
"nvidia/gpt-oss-120b": "openai/gpt-oss-120b",
|
||||
},
|
||||
antigravity: {},
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
import { PROVIDER_ID_TO_ALIAS, PROVIDER_MODELS } from "../config/providerModels.ts";
|
||||
import { parseModel } from "./model.ts";
|
||||
|
||||
// Conservative denylist fallback used when registry metadata is absent.
|
||||
// Keep small and explicit to avoid false negatives.
|
||||
const TOOL_CALLING_UNSUPPORTED_PATTERNS = [
|
||||
"gpt-oss-120b",
|
||||
"deepseek-reasoner",
|
||||
"glm-4.7",
|
||||
"glm4.7",
|
||||
];
|
||||
|
||||
function getRegistryToolCallingFlag(providerIdOrAlias: string, modelId: string): boolean | null {
|
||||
const providerAlias = PROVIDER_ID_TO_ALIAS[providerIdOrAlias] || providerIdOrAlias;
|
||||
const models = PROVIDER_MODELS[providerAlias];
|
||||
if (!Array.isArray(models)) return null;
|
||||
const found = models.find((m) => m?.id === modelId);
|
||||
if (!found) return null;
|
||||
return typeof found.toolCalling === "boolean" ? found.toolCalling : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether a model should be considered safe for structured function/tool calling.
|
||||
*
|
||||
* Decision order:
|
||||
* 1) Provider registry metadata (toolCalling flag) when available.
|
||||
* 2) Conservative denylist fallback for known problematic model families.
|
||||
* 3) Default true.
|
||||
*/
|
||||
export function supportsToolCalling(modelStr: string): boolean {
|
||||
const parsed = parseModel(modelStr);
|
||||
const provider = parsed.provider || parsed.providerAlias || "";
|
||||
const model = parsed.model || modelStr;
|
||||
|
||||
if (provider) {
|
||||
const fromRegistry = getRegistryToolCallingFlag(provider, model);
|
||||
if (fromRegistry !== null) return fromRegistry;
|
||||
}
|
||||
|
||||
const normalized = String(modelStr || "").toLowerCase();
|
||||
if (!normalized) return false;
|
||||
|
||||
const blocked = TOOL_CALLING_UNSUPPORTED_PATTERNS.some((pattern) => {
|
||||
if (normalized === pattern) return true;
|
||||
if (normalized.endsWith(`/${pattern}`)) return true;
|
||||
return normalized.includes(pattern);
|
||||
});
|
||||
|
||||
return !blocked;
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Request Deduplication Service
|
||||
*
|
||||
* Deduplicates **concurrent** identical requests to the same upstream.
|
||||
* Inspired by ClawRouter's dedup.ts (BlockRunAI / github.com/BlockRunAI/ClawRouter).
|
||||
*
|
||||
* IMPORTANT: In-memory only — does NOT persist across restarts and does NOT
|
||||
* work across multiple process instances (no cross-instance dedup).
|
||||
*/
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
|
||||
export interface DedupConfig {
|
||||
enabled: boolean;
|
||||
maxTemperatureForDedup: number;
|
||||
timeoutMs: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_DEDUP_CONFIG: DedupConfig = {
|
||||
enabled: true,
|
||||
maxTemperatureForDedup: 0.1,
|
||||
timeoutMs: 60_000,
|
||||
};
|
||||
|
||||
export interface DedupResult<T> {
|
||||
result: T;
|
||||
wasDeduplicated: boolean;
|
||||
hash: string;
|
||||
}
|
||||
|
||||
const inflight = new Map<string, Promise<unknown>>();
|
||||
|
||||
/**
|
||||
* Compute a deterministic hash for a request body.
|
||||
* Includes: model, messages, temperature, tools, tool_choice, max_tokens, response_format
|
||||
* Excludes: stream, user, metadata (don't affect LLM output)
|
||||
*/
|
||||
export function computeRequestHash(requestBody: unknown): string {
|
||||
const body = requestBody as Record<string, unknown>;
|
||||
const canonical = {
|
||||
model: body.model ?? null,
|
||||
messages: body.messages ?? null,
|
||||
temperature: typeof body.temperature === "number" ? body.temperature : 1.0,
|
||||
tools: body.tools ?? null,
|
||||
tool_choice: body.tool_choice ?? null,
|
||||
max_tokens: body.max_tokens ?? null,
|
||||
response_format: body.response_format ?? null,
|
||||
top_p: body.top_p ?? null,
|
||||
frequency_penalty: body.frequency_penalty ?? null,
|
||||
presence_penalty: body.presence_penalty ?? null,
|
||||
};
|
||||
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
/** Determine whether a request should be deduplicated */
|
||||
export function shouldDeduplicate(
|
||||
requestBody: unknown,
|
||||
config: DedupConfig = DEFAULT_DEDUP_CONFIG
|
||||
): boolean {
|
||||
if (!config.enabled) return false;
|
||||
const body = requestBody as Record<string, unknown>;
|
||||
if (body.stream === true) return false;
|
||||
const temperature = typeof body.temperature === "number" ? body.temperature : 1.0;
|
||||
if (temperature > config.maxTemperatureForDedup) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a request with deduplication.
|
||||
* Concurrent identical requests share one upstream call.
|
||||
*/
|
||||
export async function deduplicate<T>(
|
||||
hash: string,
|
||||
fn: () => Promise<T>,
|
||||
config: DedupConfig = DEFAULT_DEDUP_CONFIG
|
||||
): Promise<DedupResult<T>> {
|
||||
if (!config.enabled) {
|
||||
return { result: await fn(), wasDeduplicated: false, hash };
|
||||
}
|
||||
|
||||
const existing = inflight.get(hash);
|
||||
if (existing) {
|
||||
const result = (await existing) as T;
|
||||
return { result, wasDeduplicated: true, hash };
|
||||
}
|
||||
|
||||
let resolve!: (value: T) => void;
|
||||
let reject!: (reason: unknown) => void;
|
||||
const sharedPromise = new Promise<T>((res, rej) => {
|
||||
resolve = res;
|
||||
reject = rej;
|
||||
});
|
||||
inflight.set(hash, sharedPromise as Promise<unknown>);
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
if (inflight.get(hash) === sharedPromise) inflight.delete(hash);
|
||||
}, config.timeoutMs);
|
||||
|
||||
try {
|
||||
const result = await fn();
|
||||
resolve(result);
|
||||
return { result, wasDeduplicated: false, hash };
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
throw err;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
if (inflight.get(hash) === sharedPromise) inflight.delete(hash);
|
||||
}
|
||||
}
|
||||
|
||||
export function getInflightCount(): number {
|
||||
return inflight.size;
|
||||
}
|
||||
export function getInflightHashes(): string[] {
|
||||
return [...inflight.keys()];
|
||||
}
|
||||
export function clearInflight(): void {
|
||||
inflight.clear();
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
/**
|
||||
* Search Cache — in-memory TTL cache with request coalescing
|
||||
*
|
||||
* Bounded at MAX_CACHE_ENTRIES to prevent OOM.
|
||||
* Request coalescing deduplicates concurrent identical queries
|
||||
* to prevent cache stampede (critical for agentic tools).
|
||||
*/
|
||||
|
||||
import { createHash } from "crypto";
|
||||
|
||||
const MAX_CACHE_ENTRIES = 5000;
|
||||
const DEFAULT_TTL_MS = parseInt(process.env.SEARCH_CACHE_TTL_MS || String(5 * 60 * 1000), 10);
|
||||
|
||||
interface CacheEntry<T> {
|
||||
data: T;
|
||||
expiresAt: number;
|
||||
}
|
||||
|
||||
const cache = new Map<string, CacheEntry<unknown>>();
|
||||
const inflight = new Map<string, Promise<unknown>>();
|
||||
|
||||
let hits = 0;
|
||||
let misses = 0;
|
||||
|
||||
/**
|
||||
* Normalize a query for cache key computation.
|
||||
* NFKC normalization, lowercase, trim, collapse whitespace.
|
||||
*/
|
||||
function normalizeQuery(query: string): string {
|
||||
return query.normalize("NFKC").toLowerCase().trim().replace(/\s+/g, " ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic cache key from search parameters.
|
||||
*/
|
||||
export function computeCacheKey(
|
||||
query: string,
|
||||
provider: string,
|
||||
searchType: string,
|
||||
maxResults: number,
|
||||
country?: string,
|
||||
language?: string,
|
||||
filters?: unknown
|
||||
): string {
|
||||
const normalized = normalizeQuery(query);
|
||||
const payload = JSON.stringify({
|
||||
q: normalized,
|
||||
p: provider,
|
||||
t: searchType,
|
||||
n: maxResults,
|
||||
c: country || null,
|
||||
l: language || null,
|
||||
f: filters || null,
|
||||
});
|
||||
return createHash("sha256").update(payload).digest("hex");
|
||||
}
|
||||
|
||||
/**
|
||||
* Evict expired entries and enforce size bound.
|
||||
* Called lazily on writes. O(n) worst case but amortized O(1).
|
||||
*/
|
||||
function evictIfNeeded(): void {
|
||||
const now = Date.now();
|
||||
|
||||
// Remove expired entries first
|
||||
for (const [key, entry] of cache) {
|
||||
if (entry.expiresAt <= now) {
|
||||
cache.delete(key);
|
||||
}
|
||||
}
|
||||
|
||||
// FIFO eviction if still over limit
|
||||
while (cache.size >= MAX_CACHE_ENTRIES) {
|
||||
const firstKey = cache.keys().next().value;
|
||||
if (firstKey !== undefined) {
|
||||
cache.delete(firstKey);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or coalesce: return cached data, join an inflight request,
|
||||
* or execute the fetch function and cache the result.
|
||||
*
|
||||
* @param key - Cache key from computeCacheKey()
|
||||
* @param ttlMs - TTL in milliseconds (0 to bypass cache)
|
||||
* @param fetchFn - Function to execute on cache miss
|
||||
* @returns The cached or freshly fetched data
|
||||
*/
|
||||
export async function getOrCoalesce<T>(
|
||||
key: string,
|
||||
ttlMs: number,
|
||||
fetchFn: () => Promise<T>
|
||||
): Promise<{ data: T; cached: boolean }> {
|
||||
// 1. Check cache
|
||||
const cached = cache.get(key) as CacheEntry<T> | undefined;
|
||||
if (cached && cached.expiresAt > Date.now()) {
|
||||
hits++;
|
||||
return { data: cached.data, cached: true };
|
||||
}
|
||||
|
||||
// 2. Join inflight request if one exists (request coalescing)
|
||||
const existing = inflight.get(key) as Promise<T> | undefined;
|
||||
if (existing) {
|
||||
hits++;
|
||||
const data = await existing;
|
||||
return { data, cached: true };
|
||||
}
|
||||
|
||||
// 3. Cache miss — execute fetch
|
||||
misses++;
|
||||
const promise = fetchFn();
|
||||
inflight.set(key, promise);
|
||||
|
||||
try {
|
||||
const data = await promise;
|
||||
|
||||
// Store in cache
|
||||
if (ttlMs > 0) {
|
||||
evictIfNeeded();
|
||||
cache.set(key, { data, expiresAt: Date.now() + ttlMs });
|
||||
}
|
||||
|
||||
return { data, cached: false };
|
||||
} finally {
|
||||
inflight.delete(key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache statistics for monitoring.
|
||||
*/
|
||||
export function getCacheStats(): { size: number; hits: number; misses: number } {
|
||||
return { size: cache.size, hits, misses };
|
||||
}
|
||||
|
||||
/**
|
||||
* Default TTL for search cache entries.
|
||||
*/
|
||||
export const SEARCH_CACHE_DEFAULT_TTL_MS = DEFAULT_TTL_MS;
|
||||
@@ -91,6 +91,10 @@ export function filterToOpenAIFormat(body) {
|
||||
delete body.tools;
|
||||
}
|
||||
|
||||
// Strip Claude-specific fields that OpenAI-compatible providers reject
|
||||
delete body.metadata;
|
||||
delete body.anthropic_version;
|
||||
|
||||
// Normalize tools to OpenAI format (from Claude, Gemini, etc.)
|
||||
if (body.tools && Array.isArray(body.tools) && body.tools.length > 0) {
|
||||
body.tools = body.tools
|
||||
|
||||
@@ -131,7 +131,7 @@ export function translateRequest(
|
||||
}
|
||||
|
||||
// Final step: prepare request for Claude format endpoints
|
||||
if (targetFormat === FORMATS.CLAUDE) {
|
||||
if (targetFormat === FORMATS.CLAUDE && sourceFormat !== FORMATS.CLAUDE) {
|
||||
result = prepareClaudeRequest(result, provider);
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
import { register } from "../registry.ts";
|
||||
import { FORMATS } from "../formats.ts";
|
||||
import { generateToolCallId } from "../helpers/toolCallHelper.ts";
|
||||
|
||||
type JsonRecord = Record<string, unknown>;
|
||||
|
||||
@@ -120,6 +121,12 @@ export function openaiResponsesToOpenAIRequest(
|
||||
}
|
||||
|
||||
if (itemType === "function_call") {
|
||||
// Skip tool calls with empty names to avoid infinite placeholder_tool loops
|
||||
const fnName = toString(item.name).trim();
|
||||
if (!fnName) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Start or append assistant message with tool_calls
|
||||
if (!currentAssistantMsg) {
|
||||
currentAssistantMsg = {
|
||||
@@ -136,7 +143,7 @@ export function openaiResponsesToOpenAIRequest(
|
||||
id: toString(item.call_id),
|
||||
type: "function",
|
||||
function: {
|
||||
name: toString(item.name),
|
||||
name: fnName,
|
||||
arguments: item.arguments,
|
||||
},
|
||||
});
|
||||
@@ -201,6 +208,24 @@ export function openaiResponsesToOpenAIRequest(
|
||||
});
|
||||
}
|
||||
|
||||
// Filter orphaned tool results (no matching tool_call in assistant messages)
|
||||
const allToolCallIds = new Set<string>();
|
||||
for (const m of messages) {
|
||||
const rec = toRecord(m);
|
||||
if (Array.isArray(rec.tool_calls)) {
|
||||
for (const tc of rec.tool_calls as { id?: string }[]) {
|
||||
if (tc.id) allToolCallIds.add(String(tc.id));
|
||||
}
|
||||
}
|
||||
}
|
||||
result.messages = messages.filter((m) => {
|
||||
const rec = toRecord(m);
|
||||
if (rec.role === "tool" && rec.tool_call_id) {
|
||||
return allToolCallIds.has(String(rec.tool_call_id));
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
// Cleanup Responses API specific fields
|
||||
delete result.input;
|
||||
delete result.instructions;
|
||||
@@ -319,10 +344,15 @@ export function openaiToOpenAIResponsesRequest(
|
||||
for (const toolCallValue of msg.tool_calls) {
|
||||
const toolCall = toRecord(toolCallValue);
|
||||
const fn = toRecord(toolCall.function);
|
||||
// Skip tool calls with empty names to avoid infinite placeholder_tool loops
|
||||
const fnName = toString(fn.name).trim();
|
||||
if (!fnName) {
|
||||
continue;
|
||||
}
|
||||
input.push({
|
||||
type: "function_call",
|
||||
call_id: toString(toolCall.id),
|
||||
name: toString(fn.name),
|
||||
call_id: toString(toolCall.id).trim() || generateToolCallId(),
|
||||
name: fnName,
|
||||
arguments: toString(fn.arguments, "{}"),
|
||||
});
|
||||
}
|
||||
@@ -339,6 +369,22 @@ export function openaiToOpenAIResponsesRequest(
|
||||
}
|
||||
}
|
||||
|
||||
// Filter orphaned function_call_output items (no matching function_call)
|
||||
// This happens when Claude Code compaction removes messages but leaves tool results
|
||||
const knownCallIds = new Set(
|
||||
input
|
||||
.filter(
|
||||
(item: { type?: string; call_id?: string }) => item.type === "function_call" && item.call_id
|
||||
)
|
||||
.map((item: { type?: string; call_id?: string }) => item.call_id)
|
||||
);
|
||||
result.input = input.filter((item: { type?: string; call_id?: string }) => {
|
||||
if (item.type === "function_call_output" && item.call_id) {
|
||||
return knownCallIds.has(item.call_id);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
// If no system message, keep empty instructions
|
||||
if (!hasSystemMessage) {
|
||||
result.instructions = "";
|
||||
|
||||
@@ -123,6 +123,43 @@ export function openaiToClaudeRequest(model, body, stream) {
|
||||
|
||||
flushCurrentMessage();
|
||||
|
||||
// Remove assistant messages with empty content (can happen when all tool_use blocks were skipped)
|
||||
result.messages = result.messages.filter((msg) => {
|
||||
if (msg.role === "assistant" && Array.isArray(msg.content) && msg.content.length === 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
// Filter orphaned tool_result blocks whose tool_use_id has no matching tool_use
|
||||
const allToolUseIds = new Set<string>();
|
||||
for (const msg of result.messages) {
|
||||
if (msg.role === "assistant" && Array.isArray(msg.content)) {
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "tool_use" && block.id) {
|
||||
allToolUseIds.add(String(block.id));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const msg of result.messages) {
|
||||
if (msg.role === "user" && Array.isArray(msg.content)) {
|
||||
msg.content = msg.content.filter((block) => {
|
||||
if (block.type === "tool_result" && block.tool_use_id) {
|
||||
return allToolUseIds.has(String(block.tool_use_id));
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
// Remove user messages that became empty after orphan filtering
|
||||
result.messages = result.messages.filter((msg) => {
|
||||
if (msg.role === "user" && Array.isArray(msg.content) && msg.content.length === 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
// Add cache_control to last assistant message
|
||||
for (let i = result.messages.length - 1; i >= 0; i--) {
|
||||
const message = result.messages[i];
|
||||
|
||||
@@ -184,6 +184,17 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
typeof parsed.type === "string" &&
|
||||
parsed.type.startsWith("response.");
|
||||
|
||||
// Detect Claude SSE payloads. Includes "ping" and "error" to ensure
|
||||
// they bypass the Chat Completions sanitization path which would
|
||||
// incorrectly process or drop them.
|
||||
const isClaudeSSE =
|
||||
parsed.type &&
|
||||
typeof parsed.type === "string" &&
|
||||
(parsed.type.startsWith("message") ||
|
||||
parsed.type.startsWith("content_block") ||
|
||||
parsed.type === "ping" ||
|
||||
parsed.type === "error");
|
||||
|
||||
if (isResponsesSSE) {
|
||||
// Responses SSE: only extract usage, forward payload as-is
|
||||
const extracted = extractUsage(parsed);
|
||||
@@ -194,6 +205,22 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
if (parsed.delta && typeof parsed.delta === "string") {
|
||||
totalContentLength += parsed.delta.length;
|
||||
}
|
||||
} else if (isClaudeSSE) {
|
||||
// Claude SSE: extract usage, track content, forward as-is
|
||||
const extracted = extractUsage(parsed);
|
||||
if (extracted) {
|
||||
// Non-destructive merge: never overwrite a positive value with 0
|
||||
// message_start carries input_tokens, message_delta carries output_tokens
|
||||
if (!usage) usage = {};
|
||||
if (extracted.prompt_tokens > 0) usage.prompt_tokens = extracted.prompt_tokens;
|
||||
if (extracted.completion_tokens > 0) usage.completion_tokens = extracted.completion_tokens;
|
||||
if (extracted.total_tokens > 0) usage.total_tokens = extracted.total_tokens;
|
||||
if (extracted.cache_read_input_tokens) usage.cache_read_input_tokens = extracted.cache_read_input_tokens;
|
||||
if (extracted.cache_creation_input_tokens) usage.cache_creation_input_tokens = extracted.cache_creation_input_tokens;
|
||||
}
|
||||
// Track content length from Claude format
|
||||
if (parsed.delta?.text) totalContentLength += parsed.delta.text.length;
|
||||
if (parsed.delta?.thinking) totalContentLength += parsed.delta.thinking.length;
|
||||
} else {
|
||||
// Chat Completions: full sanitization pipeline
|
||||
parsed = sanitizeStreamingChunk(parsed);
|
||||
@@ -372,9 +399,9 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
controller.enqueue(encoder.encode(output));
|
||||
}
|
||||
|
||||
// Estimate usage if provider didn't return valid usage (PASSTHROUGH is always OpenAI format)
|
||||
// Estimate usage if provider didn't return valid usage
|
||||
if (!hasValidUsage(usage) && totalContentLength > 0) {
|
||||
usage = estimateUsage(body, totalContentLength, FORMATS.OPENAI);
|
||||
usage = estimateUsage(body, totalContentLength, sourceFormat || FORMATS.OPENAI);
|
||||
}
|
||||
|
||||
if (hasValidUsage(usage)) {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "omniroute",
|
||||
"version": "2.6.2",
|
||||
"version": "2.7.2",
|
||||
"description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
@@ -90,7 +90,7 @@
|
||||
"express": "^5.2.1",
|
||||
"fetch-socks": "^1.3.2",
|
||||
"http-proxy-middleware": "^3.0.5",
|
||||
"https-proxy-agent": "^7.0.6",
|
||||
"https-proxy-agent": "^8.0.0",
|
||||
"jose": "^6.1.3",
|
||||
"lowdb": "^7.0.1",
|
||||
"monaco-editor": "^0.55.1",
|
||||
|
||||
|
After Width: | Height: | Size: 3.2 KiB |
|
After Width: | Height: | Size: 3.2 KiB |
@@ -0,0 +1 @@
|
||||
<svg width="56" height="64" viewBox="0 0 56 64" fill="none" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M53.292 15.321l1.5-3.676s-1.909-2.043-4.227-4.358c-2.317-2.315-7.225-.953-7.225-.953L37.751 0H18.12l-5.589 6.334s-4.908-1.362-7.225.953C2.988 9.602 1.08 11.645 1.08 11.645l1.5 3.676-1.91 5.447s5.614 21.236 6.272 23.83c1.295 5.106 2.181 7.08 5.862 9.668 3.68 2.587 10.36 7.08 11.45 7.762 1.091.68 2.455 1.84 3.682 1.84 1.227 0 2.59-1.16 3.68-1.84 1.091-.681 7.77-5.175 11.452-7.762 3.68-2.587 4.567-4.562 5.862-9.668.657-2.594 6.27-23.83 6.27-23.83l-1.908-5.447z" fill="url(#paint0_linear)"/><path fill-rule="evenodd" clip-rule="evenodd" d="M34.888 11.508c.818 0 6.885-1.157 6.885-1.157s7.189 8.68 7.189 10.536c0 1.534-.619 2.134-1.347 2.842-.152.148-.31.3-.467.468l-5.39 5.717a9.42 9.42 0 01-.176.18c-.538.54-1.33 1.336-.772 2.658l.115.269c.613 1.432 1.37 3.2.407 4.99-1.025 1.906-2.78 3.178-3.905 2.967-1.124-.21-3.766-1.589-4.737-2.218-.971-.63-4.05-3.166-4.05-4.137 0-.809 2.214-2.155 3.29-2.81.214-.13.383-.232.48-.298.111-.075.297-.19.526-.332.981-.61 2.754-1.71 2.799-2.197.055-.602.034-.778-.758-2.264-.168-.316-.365-.654-.568-1.004-.754-1.295-1.598-2.745-1.41-3.784.21-1.173 2.05-1.845 3.608-2.415.194-.07.385-.14.567-.209l1.623-.609c1.556-.582 3.284-1.229 3.57-1.36.394-.181.292-.355-.903-.468a54.655 54.655 0 01-.58-.06c-1.48-.157-4.209-.446-5.535-.077-.261.073-.553.152-.86.235-1.49.403-3.317.897-3.493 1.182-.03.05-.06.093-.089.133-.168.238-.277.394-.091 1.406.055.302.169.895.31 1.629.41 2.148 1.053 5.498 1.134 6.25.011.106.024.207.036.305.103.84.171 1.399-.805 1.622l-.255.058c-1.102.252-2.717.623-3.3.623-.584 0-2.2-.37-3.302-.623l-.254-.058c-.976-.223-.907-.782-.804-1.622.012-.098.024-.2.035-.305.081-.753.725-4.112 1.137-6.259.14-.73.253-1.32.308-1.62.185-1.012.076-1.168-.092-1.406a3.743 3.743 0 01-.09-.133c-.174-.285-2-.779-3.491-1.182-.307-.083-.6-.162-.86-.235-1.327-.37-4.055-.08-5.535.077-.226.024-.422.045-.58.06-1.196.113-1.297.287-.903.468.285.131 2.013.778 3.568 1.36.597.223 1.17.437 1.624.609.183.069.373.138.568.21 1.558.57 3.398 1.241 3.608 2.414.187 1.039-.657 2.489-1.41 3.784-.204.35-.4.688-.569 1.004-.791 1.486-.812 1.662-.757 2.264.044.488 1.816 1.587 2.798 2.197.229.142.415.257.526.332.098.066.266.168.48.298 1.076.654 3.29 2 3.29 2.81 0 .97-3.078 3.507-4.05 4.137-.97.63-3.612 2.008-4.737 2.218-1.124.21-2.88-1.061-3.904-2.966-.963-1.791-.207-3.559.406-4.99l.115-.27c.559-1.322-.233-2.118-.772-2.658a9.377 9.377 0 01-.175-.18l-5.39-5.717c-.158-.167-.316-.32-.468-.468-.728-.707-1.346-1.308-1.346-2.842 0-1.855 7.189-10.536 7.189-10.536s6.066 1.157 6.884 1.157c.653 0 1.913-.433 3.227-.885.333-.114.669-.23 1-.34 1.635-.545 2.726-.549 2.726-.549s1.09.004 2.726.549c.33.11.667.226 1 .34 1.313.452 2.574.885 3.226.885zm-1.041 30.706c1.282.66 2.192 1.128 2.536 1.343.445.278.174.803-.232 1.09-.405.285-5.853 4.499-6.381 4.965l-.215.191c-.509.459-1.159 1.044-1.62 1.044-.46 0-1.11-.586-1.62-1.044l-.213-.191c-.53-.466-5.977-4.68-6.382-4.966-.405-.286-.677-.81-.232-1.09.344-.214 1.255-.683 2.539-1.344l1.22-.629c1.92-.992 4.315-1.837 4.689-1.837.373 0 2.767.844 4.689 1.837.436.226.845.437 1.222.63z" fill="#fff"/><path fill-rule="evenodd" clip-rule="evenodd" d="M43.34 6.334L37.751 0H18.12l-5.589 6.334s-4.908-1.362-7.225.953c0 0 6.544-.59 8.793 3.064 0 0 6.066 1.157 6.884 1.157.818 0 2.59-.68 4.226-1.225 1.636-.545 2.727-.549 2.727-.549s1.09.004 2.726.549 3.408 1.225 4.226 1.225c.818 0 6.885-1.157 6.885-1.157 2.249-3.654 8.792-3.064 8.792-3.064-2.317-2.315-7.225-.953-7.225-.953z" fill="url(#paint1_linear)"/><defs><linearGradient id="paint0_linear" x1=".671" y1="64.319" x2="55.2" y2="64.319" gradientUnits="userSpaceOnUse"><stop stop-color="#F50"/><stop offset=".41" stop-color="#F50"/><stop offset=".582" stop-color="#FF2000"/><stop offset="1" stop-color="#FF2000"/></linearGradient><linearGradient id="paint1_linear" x1="6.278" y1="11.466" x2="50.565" y2="11.466" gradientUnits="userSpaceOnUse"><stop stop-color="#FF452A"/><stop offset="1" stop-color="#FF2000"/></linearGradient></defs></svg>
|
||||
|
After Width: | Height: | Size: 4.0 KiB |
|
After Width: | Height: | Size: 6.6 KiB |
@@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" viewBox="0 0 48 48">
|
||||
<rect width="48" height="48" rx="8" fill="#1E40AF"/>
|
||||
<text x="24" y="32" text-anchor="middle" font-family="system-ui,-apple-system,sans-serif" font-size="22" font-weight="700" fill="white">exa</text>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 295 B |
@@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" viewBox="0 0 48 48">
|
||||
<rect width="48" height="48" rx="8" fill="#1E40AF"/>
|
||||
<text x="24" y="32" text-anchor="middle" font-family="system-ui,-apple-system,sans-serif" font-size="22" font-weight="700" fill="white">exa</text>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 295 B |
|
After Width: | Height: | Size: 472 B |
|
After Width: | Height: | Size: 7.0 KiB |
|
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 7.0 KiB |
|
After Width: | Height: | Size: 2.7 KiB |
|
After Width: | Height: | Size: 2.7 KiB |
|
After Width: | Height: | Size: 1.3 KiB |
|
After Width: | Height: | Size: 1.3 KiB |
@@ -14,6 +14,7 @@
|
||||
*
|
||||
* Fixes: https://github.com/diegosouzapw/OmniRoute/issues/129
|
||||
* Fixes: https://github.com/diegosouzapw/OmniRoute/issues/321
|
||||
* Fixes: https://github.com/diegosouzapw/OmniRoute/issues/426
|
||||
*/
|
||||
|
||||
import { existsSync, copyFileSync, mkdirSync } from "node:fs";
|
||||
@@ -80,8 +81,54 @@ if (existsSync(rootBinary)) {
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 1.5: Use node-pre-gyp to download the correct prebuilt binary
|
||||
// This works on Windows without requiring node-gyp, Python, or MSVC.
|
||||
// better-sqlite3 ships prebuilts for win32-x64, win32-arm64, darwin-x64/arm64.
|
||||
console.log(" 📥 Attempting to download prebuilt binary via node-pre-gyp...");
|
||||
try {
|
||||
const { execSync } = await import("node:child_process");
|
||||
// better-sqlite3 bundles @mapbox/node-pre-gyp — use it directly
|
||||
const preGypBin = join(
|
||||
ROOT,
|
||||
"app",
|
||||
"node_modules",
|
||||
".bin",
|
||||
process.platform === "win32" ? "node-pre-gyp.cmd" : "node-pre-gyp"
|
||||
);
|
||||
const preGypFallback = join(
|
||||
ROOT,
|
||||
"app",
|
||||
"node_modules",
|
||||
"@mapbox",
|
||||
"node-pre-gyp",
|
||||
"bin",
|
||||
"node-pre-gyp"
|
||||
);
|
||||
const preGypCmd = existsSync(preGypBin) ? preGypBin : preGypFallback;
|
||||
|
||||
if (existsSync(preGypCmd)) {
|
||||
execSync(`"${process.execPath}" "${preGypCmd}" install --fallback-to-build=false`, {
|
||||
cwd: join(ROOT, "app", "node_modules", "better-sqlite3"),
|
||||
stdio: "inherit",
|
||||
timeout: 60_000,
|
||||
});
|
||||
mkdirSync(dirname(appBinary), { recursive: true });
|
||||
try {
|
||||
process.dlopen({ exports: {} }, appBinary);
|
||||
console.log(" ✅ Prebuilt binary downloaded and loaded successfully!\n");
|
||||
process.exit(0);
|
||||
} catch (loadErr) {
|
||||
console.warn(` ⚠️ Downloaded binary failed to load: ${loadErr.message}`);
|
||||
}
|
||||
} else {
|
||||
console.warn(" ⚠️ node-pre-gyp not found, skipping prebuilt download.");
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn(` ⚠️ node-pre-gyp download failed: ${err.message.split("\n")[0]}`);
|
||||
}
|
||||
|
||||
// Strategy 2: Fall back to npm rebuild (may work if build tools are available)
|
||||
console.log(" ⚠️ Root binary not available or incompatible, attempting npm rebuild...");
|
||||
console.log(" ⚠️ Attempting npm rebuild (requires build tools)...");
|
||||
|
||||
try {
|
||||
const { execSync } = await import("node:child_process");
|
||||
@@ -103,14 +150,23 @@ try {
|
||||
}
|
||||
}
|
||||
|
||||
// If nothing worked, warn but don't fail the install — let the package stay
|
||||
// installed so users can fix manually or use the pre-flight check in the CLI
|
||||
console.warn(" ⚠️ Could not fix better-sqlite3 native module automatically.");
|
||||
// If nothing worked, warn but don't fail the install
|
||||
console.warn("\n ⚠️ Could not fix better-sqlite3 native module automatically.");
|
||||
console.warn(" The server may not start correctly.");
|
||||
console.warn(" Try manually:");
|
||||
console.warn(` cd ${join(ROOT, "app")} && npm rebuild better-sqlite3`);
|
||||
if (process.platform === "darwin") {
|
||||
console.warn(" Manual fix options:");
|
||||
if (process.platform === "win32") {
|
||||
console.warn(" Option A (easiest — no build tools needed):");
|
||||
console.warn(` cd "${join(ROOT, "app", "node_modules", "better-sqlite3")}"`);
|
||||
console.warn(" npx @mapbox/node-pre-gyp install --fallback-to-build=false");
|
||||
console.warn(" Option B (requires Build Tools for Visual Studio):");
|
||||
console.warn(` cd "${join(ROOT, "app")}" && npm rebuild better-sqlite3`);
|
||||
console.warn(" Install from: https://visualstudio.microsoft.com/visual-cpp-build-tools/");
|
||||
console.warn(" Also ensure Python is installed: https://python.org");
|
||||
} else if (process.platform === "darwin") {
|
||||
console.warn(` cd ${join(ROOT, "app")} && npm rebuild better-sqlite3`);
|
||||
console.warn(" If build tools are missing: xcode-select --install");
|
||||
} else {
|
||||
console.warn(` cd ${join(ROOT, "app")} && npm rebuild better-sqlite3`);
|
||||
}
|
||||
console.warn("");
|
||||
|
||||
|
||||
@@ -142,6 +142,62 @@ if (sanitisedCount > 0) {
|
||||
console.log(" ℹ️ No hardcoded paths found to sanitise");
|
||||
}
|
||||
|
||||
// ── Step 5.6: Strip Turbopack hashed externals from compiled chunks ─────────
|
||||
// Even when Turbopack is disabled at build time, some instrumentation chunks
|
||||
// may still emit require('package-<16hexchars>') instead of require('package').
|
||||
// These hashed names don't exist in node_modules and cause MODULE_NOT_FOUND at
|
||||
// runtime. We strip the hex suffix from all .js files in app/.next/server/
|
||||
// to ensure all require() calls use the real package names.
|
||||
{
|
||||
const serverOutput = join(APP_DIR, ".next", "server");
|
||||
const HASH_RE = /(['"\\])([a-z@][a-z0-9@./_-]+-[0-9a-f]{16})\1/g;
|
||||
let patchedFiles = 0;
|
||||
let patchedMatches = 0;
|
||||
const walkDir = (dir) => {
|
||||
let entries = [];
|
||||
try {
|
||||
entries = readdirSync(dir);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
const full = join(dir, entry);
|
||||
try {
|
||||
const st = statSync(full);
|
||||
if (st.isDirectory()) {
|
||||
walkDir(full);
|
||||
continue;
|
||||
}
|
||||
if (!entry.endsWith(".js")) continue;
|
||||
const src = readFileSync(full, "utf8");
|
||||
let count = 0;
|
||||
const patched = src.replace(HASH_RE, (_, q, name) => {
|
||||
const base = name.replace(/-[0-9a-f]{16}$/, "");
|
||||
count++;
|
||||
return `${q}${base}${q}`;
|
||||
});
|
||||
if (count > 0) {
|
||||
writeFileSync(full, patched);
|
||||
patchedFiles++;
|
||||
patchedMatches += count;
|
||||
}
|
||||
} catch {
|
||||
/* skip unreadable files */
|
||||
}
|
||||
}
|
||||
};
|
||||
if (existsSync(serverOutput)) {
|
||||
walkDir(serverOutput);
|
||||
if (patchedMatches > 0) {
|
||||
console.log(
|
||||
` 🔧 Hash-strip: patched ${patchedMatches} hashed require() in ${patchedFiles} server chunk file(s)`
|
||||
);
|
||||
} else {
|
||||
console.log(" ✅ Hash-strip: no hashed externals found in compiled chunks.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Step 6: Copy static assets ─────────────────────────────
|
||||
const staticSrc = join(ROOT, ".next", "static");
|
||||
const staticDest = join(APP_DIR, ".next", "static");
|
||||
@@ -222,6 +278,19 @@ if (existsSync(swcHelpersSrc) && !existsSync(swcHelpersDst)) {
|
||||
console.log(" ✅ @swc/helpers included in standalone build.");
|
||||
}
|
||||
|
||||
// ── Step 10.6: Remove large binaries from standalone build ──
|
||||
// These directories contain platform-native binaries (.node, .asar) that
|
||||
// trigger Z_DATA_ERROR during npm pack. They are not needed in the npm package.
|
||||
const binaryDirsToRemove = ["vscode-extension", "electron"];
|
||||
for (const dir of binaryDirsToRemove) {
|
||||
const targetDir = join(APP_DIR, dir);
|
||||
if (existsSync(targetDir)) {
|
||||
console.log(` 🧹 Removing app/${dir}/ (not needed in npm package)...`);
|
||||
rmSync(targetDir, { recursive: true, force: true });
|
||||
console.log(` ✅ app/${dir}/ removed.`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Done ───────────────────────────────────────────────────
|
||||
const appPkg = join(APP_DIR, "package.json");
|
||||
if (existsSync(appPkg)) {
|
||||
|
||||
@@ -33,11 +33,29 @@ export default function APIPageClient({ machineId }) {
|
||||
const [viewTab, setViewTab] = useState("api");
|
||||
const [mcpStatus, setMcpStatus] = useState<any>(null);
|
||||
const [a2aStatus, setA2aStatus] = useState<any>(null);
|
||||
const [searchProviders, setSearchProviders] = useState<any[]>([]);
|
||||
|
||||
const { copied, copy } = useCopyToClipboard();
|
||||
|
||||
const fetchSearchProviders = async () => {
|
||||
try {
|
||||
const res = await fetch("/v1/search");
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
setSearchProviders(data.data || []);
|
||||
}
|
||||
} catch {
|
||||
// Search endpoint may not be available
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
Promise.allSettled([loadCloudSettings(), fetchModels(), fetchProtocolStatus()]).finally(() => {
|
||||
Promise.allSettled([
|
||||
loadCloudSettings(),
|
||||
fetchModels(),
|
||||
fetchProtocolStatus(),
|
||||
fetchSearchProviders(),
|
||||
]).finally(() => {
|
||||
setLoading(false);
|
||||
});
|
||||
}, []);
|
||||
@@ -575,6 +593,47 @@ export default function APIPageClient({ machineId }) {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Search & Discovery */}
|
||||
{searchProviders.length > 0 && (
|
||||
<div className="mb-6">
|
||||
<div className="flex items-center gap-2 mb-3">
|
||||
<span className="material-symbols-outlined text-sm text-cyan-400">
|
||||
travel_explore
|
||||
</span>
|
||||
<h3 className="text-xs font-semibold text-text-muted uppercase tracking-wider">
|
||||
{t("categorySearch") || "Search & Discovery"}
|
||||
</h3>
|
||||
<div className="flex-1 h-px bg-border/50" />
|
||||
</div>
|
||||
<div className="flex flex-col gap-3">
|
||||
<EndpointSection
|
||||
icon="search"
|
||||
iconColor="text-cyan-500"
|
||||
iconBg="bg-cyan-500/10"
|
||||
title={t("webSearch") || "Web Search"}
|
||||
path="/v1/search"
|
||||
description={
|
||||
t("webSearchDesc") ||
|
||||
"Unified web search across multiple providers with automatic failover and caching"
|
||||
}
|
||||
models={searchProviders.map((p) => ({
|
||||
id: p.id,
|
||||
name: p.name,
|
||||
owned_by: p.id,
|
||||
type: "search",
|
||||
}))}
|
||||
expanded={expandedEndpoint === "search"}
|
||||
onToggle={() =>
|
||||
setExpandedEndpoint(expandedEndpoint === "search" ? null : "search")
|
||||
}
|
||||
copy={copy}
|
||||
copied={copied}
|
||||
baseUrl={currentEndpoint}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Utility & Management */}
|
||||
<div>
|
||||
<div className="flex items-center gap-2 mb-3">
|
||||
|
||||
@@ -81,29 +81,36 @@ const PROVIDER_MODELS: Record<
|
||||
{ id: "openai/dall-e-2", name: "DALL-E 2" },
|
||||
],
|
||||
},
|
||||
{ id: "xai", name: "xAI (Grok)", models: [{ id: "xai/grok-2-image", name: "Grok 2 Image" }] },
|
||||
{
|
||||
id: "xai",
|
||||
name: "xAI (Grok)",
|
||||
models: [{ id: "xai/grok-2-image-1212", name: "Grok 2 Image" }],
|
||||
},
|
||||
{
|
||||
id: "together",
|
||||
name: "Together AI",
|
||||
models: [
|
||||
{ id: "together/stable-diffusion-xl", name: "SDXL" },
|
||||
{ id: "together/FLUX.1-schnell-Free", name: "FLUX.1 Schnell" },
|
||||
{ id: "together/stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL" },
|
||||
{ id: "together/black-forest-labs/FLUX.1-schnell-Free", name: "FLUX.1 Schnell" },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: "fireworks",
|
||||
name: "Fireworks AI",
|
||||
models: [
|
||||
{ id: "fireworks/stable-diffusion-xl-1024-v1-0", name: "SDXL 1024" },
|
||||
{ id: "fireworks/flux-1-dev-fp8", name: "FLUX.1 Dev" },
|
||||
{
|
||||
id: "fireworks/accounts/fireworks/models/stable-diffusion-xl-1024-v1-0",
|
||||
name: "SDXL 1024",
|
||||
},
|
||||
{ id: "fireworks/accounts/fireworks/models/flux-1-dev-fp8", name: "FLUX.1 Dev" },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: "nebius",
|
||||
name: "Nebius AI",
|
||||
models: [
|
||||
{ id: "nebius/flux-dev", name: "FLUX Dev" },
|
||||
{ id: "nebius/sdxl", name: "SDXL" },
|
||||
{ id: "nebius/black-forest-labs/flux-dev", name: "FLUX Dev" },
|
||||
{ id: "nebius/black-forest-labs/flux-schnell", name: "FLUX Schnell" },
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -117,7 +124,10 @@ const PROVIDER_MODELS: Record<
|
||||
{
|
||||
id: "nanobanana",
|
||||
name: "NanoBanana",
|
||||
models: [{ id: "nanobanana/flux-schnell", name: "FLUX Schnell" }],
|
||||
models: [
|
||||
{ id: "nanobanana/nanobanana-flash", name: "NanoBanana Flash" },
|
||||
{ id: "nanobanana/nanobanana-pro", name: "NanoBanana Pro" },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: "sdwebui",
|
||||
|
||||
@@ -101,6 +101,7 @@ export default function ProviderDetailPage() {
|
||||
const isOpenAICompatible = isOpenAICompatibleProvider(providerId);
|
||||
const isAnthropicCompatible = isAnthropicCompatibleProvider(providerId);
|
||||
const isCompatible = isOpenAICompatible || isAnthropicCompatible;
|
||||
const isSearchProvider = providerId.endsWith("-search");
|
||||
|
||||
const providerStorageAlias = isCompatible ? providerId : providerAlias;
|
||||
const providerDisplayAlias = isCompatible ? providerNode?.prefix || providerId : providerAlias;
|
||||
@@ -1060,21 +1061,43 @@ export default function ProviderDetailPage() {
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Models */}
|
||||
<Card>
|
||||
<h2 className="text-lg font-semibold mb-4">{t("availableModels")}</h2>
|
||||
{renderModelsSection()}
|
||||
{/* Models — hidden for search providers (they don't have models) */}
|
||||
{!isSearchProvider && (
|
||||
<Card>
|
||||
<h2 className="text-lg font-semibold mb-4">{t("availableModels")}</h2>
|
||||
{renderModelsSection()}
|
||||
|
||||
{/* Custom Models — available for ALL providers */}
|
||||
{!isCompatible && (
|
||||
<CustomModelsSection
|
||||
providerId={providerId}
|
||||
providerAlias={providerDisplayAlias}
|
||||
copied={copied}
|
||||
onCopy={copy}
|
||||
/>
|
||||
)}
|
||||
</Card>
|
||||
{/* Custom Models — available for non-compatible, non-search providers */}
|
||||
{!isCompatible && (
|
||||
<CustomModelsSection
|
||||
providerId={providerId}
|
||||
providerAlias={providerDisplayAlias}
|
||||
copied={copied}
|
||||
onCopy={copy}
|
||||
/>
|
||||
)}
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Search provider info */}
|
||||
{isSearchProvider && (
|
||||
<Card>
|
||||
<h2 className="text-lg font-semibold mb-4">{t("searchProvider") || "Search Provider"}</h2>
|
||||
<p className="text-sm text-text-muted">
|
||||
{t("searchProviderDesc") ||
|
||||
"This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."}
|
||||
</p>
|
||||
{providerId === "perplexity-search" && (
|
||||
<div className="mt-3 flex items-center gap-2 px-3 py-2 rounded-lg bg-blue-500/10 border border-blue-500/20">
|
||||
<span className="material-symbols-outlined text-sm text-blue-400">link</span>
|
||||
<p className="text-xs text-blue-300">
|
||||
Uses the same API key as <strong>Perplexity</strong> (chat provider). If you already
|
||||
have Perplexity configured, no additional setup is needed.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Modals */}
|
||||
{providerId === "kiro" ? (
|
||||
|
||||
@@ -0,0 +1,614 @@
|
||||
"use client";
|
||||
|
||||
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||
import { Button, Card, Modal } from "@/shared/components";
|
||||
|
||||
type ProxyItem = {
|
||||
id: string;
|
||||
name: string;
|
||||
type: string;
|
||||
host: string;
|
||||
port: number;
|
||||
region?: string | null;
|
||||
notes?: string | null;
|
||||
status?: string;
|
||||
};
|
||||
|
||||
type UsageInfo = {
|
||||
count: number;
|
||||
assignments: Array<{ scope: string; scopeId: string | null }>;
|
||||
};
|
||||
|
||||
type HealthInfo = {
|
||||
proxyId: string;
|
||||
totalRequests: number;
|
||||
successRate: number | null;
|
||||
avgLatencyMs: number | null;
|
||||
lastSeenAt: string | null;
|
||||
};
|
||||
|
||||
const EMPTY_FORM = {
|
||||
id: "",
|
||||
name: "",
|
||||
type: "http",
|
||||
host: "",
|
||||
port: "8080",
|
||||
username: "",
|
||||
password: "",
|
||||
region: "",
|
||||
notes: "",
|
||||
status: "active",
|
||||
};
|
||||
|
||||
export default function ProxyRegistryManager() {
|
||||
const [items, setItems] = useState<ProxyItem[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const [modalOpen, setModalOpen] = useState(false);
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [form, setForm] = useState(EMPTY_FORM);
|
||||
|
||||
const [usageById, setUsageById] = useState<Record<string, UsageInfo>>({});
|
||||
const [healthById, setHealthById] = useState<Record<string, HealthInfo>>({});
|
||||
const [migrating, setMigrating] = useState(false);
|
||||
const [bulkOpen, setBulkOpen] = useState(false);
|
||||
const [bulkSaving, setBulkSaving] = useState(false);
|
||||
const [bulkScope, setBulkScope] = useState("provider");
|
||||
const [bulkScopeIds, setBulkScopeIds] = useState("");
|
||||
const [bulkProxyId, setBulkProxyId] = useState("");
|
||||
|
||||
const editingId = useMemo(() => form.id || "", [form.id]);
|
||||
|
||||
const loadHealth = useCallback(async () => {
|
||||
try {
|
||||
const res = await fetch("/api/settings/proxies/health?hours=24");
|
||||
const data = await res.json().catch(() => ({}));
|
||||
if (!res.ok) return;
|
||||
const entries = Array.isArray(data?.items) ? data.items : [];
|
||||
const mapped = Object.fromEntries(
|
||||
entries.map((entry: HealthInfo) => [entry.proxyId, entry])
|
||||
) as Record<string, HealthInfo>;
|
||||
setHealthById(mapped);
|
||||
} catch {
|
||||
// ignore health loading errors in UI
|
||||
}
|
||||
}, []);
|
||||
|
||||
const load = useCallback(async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
const res = await fetch("/api/settings/proxies");
|
||||
const data = await res.json().catch(() => ({}));
|
||||
if (!res.ok) {
|
||||
setError(data?.error?.message || "Failed to load proxy registry");
|
||||
setItems([]);
|
||||
return;
|
||||
}
|
||||
setItems(Array.isArray(data?.items) ? data.items : []);
|
||||
void loadHealth();
|
||||
} catch (e: any) {
|
||||
setError(e?.message || "Failed to load proxy registry");
|
||||
setItems([]);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [loadHealth]);
|
||||
|
||||
useEffect(() => {
|
||||
void load();
|
||||
}, [load]);
|
||||
|
||||
useEffect(() => {
|
||||
if (items.length > 0 && !bulkProxyId) {
|
||||
setBulkProxyId(items[0].id);
|
||||
}
|
||||
}, [items, bulkProxyId]);
|
||||
|
||||
const openCreate = () => {
|
||||
setForm(EMPTY_FORM);
|
||||
setModalOpen(true);
|
||||
};
|
||||
|
||||
const openEdit = (item: ProxyItem) => {
|
||||
setForm({
|
||||
id: item.id,
|
||||
name: item.name || "",
|
||||
type: item.type || "http",
|
||||
host: item.host || "",
|
||||
port: String(item.port || 8080),
|
||||
username: "",
|
||||
password: "",
|
||||
region: item.region || "",
|
||||
notes: item.notes || "",
|
||||
status: item.status || "active",
|
||||
});
|
||||
setModalOpen(true);
|
||||
};
|
||||
|
||||
const loadUsage = async (proxyId: string) => {
|
||||
try {
|
||||
const res = await fetch(
|
||||
`/api/settings/proxies?id=${encodeURIComponent(proxyId)}&whereUsed=1`
|
||||
);
|
||||
const data = await res.json().catch(() => ({}));
|
||||
if (!res.ok) return;
|
||||
setUsageById((prev) => ({
|
||||
...prev,
|
||||
[proxyId]: {
|
||||
count: Number(data?.count || 0),
|
||||
assignments: Array.isArray(data?.assignments) ? data.assignments : [],
|
||||
},
|
||||
}));
|
||||
} catch {
|
||||
// ignore usage loading errors in UI
|
||||
}
|
||||
};
|
||||
|
||||
const handleSave = async () => {
|
||||
if (!form.name.trim() || !form.host.trim()) {
|
||||
setError("Name and host are required");
|
||||
return;
|
||||
}
|
||||
|
||||
setSaving(true);
|
||||
setError(null);
|
||||
|
||||
const normalizedUsername = form.username.trim();
|
||||
const normalizedPassword = form.password.trim();
|
||||
|
||||
const payload: Record<string, unknown> = {
|
||||
...(editingId ? { id: editingId } : {}),
|
||||
name: form.name.trim(),
|
||||
type: form.type,
|
||||
host: form.host.trim(),
|
||||
port: Number(form.port || 8080),
|
||||
region: form.region.trim() || null,
|
||||
notes: form.notes.trim() || null,
|
||||
status: form.status,
|
||||
};
|
||||
if (!editingId || normalizedUsername.length > 0) {
|
||||
payload.username = normalizedUsername;
|
||||
}
|
||||
if (!editingId || normalizedPassword.length > 0) {
|
||||
payload.password = normalizedPassword;
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch("/api/settings/proxies", {
|
||||
method: editingId ? "PATCH" : "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
const data = await res.json().catch(() => ({}));
|
||||
if (!res.ok) {
|
||||
setError(data?.error?.message || "Failed to save proxy");
|
||||
return;
|
||||
}
|
||||
|
||||
setModalOpen(false);
|
||||
setForm(EMPTY_FORM);
|
||||
await load();
|
||||
} catch (e: any) {
|
||||
setError(e?.message || "Failed to save proxy");
|
||||
} finally {
|
||||
setSaving(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleDelete = async (id: string) => {
|
||||
try {
|
||||
const res = await fetch(`/api/settings/proxies?id=${encodeURIComponent(id)}`, {
|
||||
method: "DELETE",
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
await load();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = await res.json().catch(() => ({}));
|
||||
const inUse = res.status === 409;
|
||||
if (inUse) {
|
||||
const ok = window.confirm(
|
||||
"This proxy is still assigned. Force delete and remove all assignments?"
|
||||
);
|
||||
if (!ok) return;
|
||||
|
||||
const forceRes = await fetch(`/api/settings/proxies?id=${encodeURIComponent(id)}&force=1`, {
|
||||
method: "DELETE",
|
||||
});
|
||||
|
||||
if (!forceRes.ok) {
|
||||
const forcePayload = await forceRes.json().catch(() => ({}));
|
||||
setError(forcePayload?.error?.message || "Failed to force delete proxy");
|
||||
return;
|
||||
}
|
||||
|
||||
await load();
|
||||
return;
|
||||
}
|
||||
|
||||
setError(payload?.error?.message || "Failed to delete proxy");
|
||||
} catch (e: any) {
|
||||
setError(e?.message || "Failed to delete proxy");
|
||||
}
|
||||
};
|
||||
|
||||
const handleMigrate = async () => {
|
||||
setMigrating(true);
|
||||
setError(null);
|
||||
try {
|
||||
const res = await fetch("/api/settings/proxies/migrate", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ force: false }),
|
||||
});
|
||||
const data = await res.json().catch(() => ({}));
|
||||
if (!res.ok) {
|
||||
setError(data?.error?.message || "Failed to migrate legacy proxy config");
|
||||
return;
|
||||
}
|
||||
await load();
|
||||
} catch (e: any) {
|
||||
setError(e?.message || "Failed to migrate legacy proxy config");
|
||||
} finally {
|
||||
setMigrating(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleBulkAssign = async () => {
|
||||
setBulkSaving(true);
|
||||
setError(null);
|
||||
try {
|
||||
const scopeIds =
|
||||
bulkScope === "global"
|
||||
? []
|
||||
: bulkScopeIds
|
||||
.split(/[\n,]/g)
|
||||
.map((part) => part.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
const res = await fetch("/api/settings/proxies/bulk-assign", {
|
||||
method: "PUT",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
scope: bulkScope,
|
||||
scopeIds,
|
||||
proxyId: bulkProxyId || null,
|
||||
}),
|
||||
});
|
||||
const payload = await res.json().catch(() => ({}));
|
||||
if (!res.ok) {
|
||||
setError(payload?.error?.message || "Failed to run bulk assignment");
|
||||
return;
|
||||
}
|
||||
|
||||
setBulkOpen(false);
|
||||
setBulkScopeIds("");
|
||||
await load();
|
||||
} catch (e: any) {
|
||||
setError(e?.message || "Failed to run bulk assignment");
|
||||
} finally {
|
||||
setBulkSaving(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Card className="p-6">
|
||||
<div className="flex items-center justify-between gap-3 mb-4">
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold">Proxy Registry</h3>
|
||||
<p className="text-sm text-text-muted">Store reusable proxies and track assignments.</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<Button
|
||||
size="sm"
|
||||
variant="secondary"
|
||||
icon="upgrade"
|
||||
onClick={handleMigrate}
|
||||
loading={migrating}
|
||||
data-testid="proxy-registry-import-legacy"
|
||||
>
|
||||
Import Legacy
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="secondary"
|
||||
icon="account_tree"
|
||||
onClick={() => setBulkOpen(true)}
|
||||
data-testid="proxy-registry-open-bulk"
|
||||
>
|
||||
Bulk Assign
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
icon="add"
|
||||
onClick={openCreate}
|
||||
data-testid="proxy-registry-open-create"
|
||||
>
|
||||
Add Proxy
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="mb-3 px-3 py-2 rounded border border-red-500/30 bg-red-500/10 text-sm text-red-400">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{loading ? (
|
||||
<div className="text-sm text-text-muted">Loading proxies...</div>
|
||||
) : items.length === 0 ? (
|
||||
<div className="text-sm text-text-muted">No saved proxies yet.</div>
|
||||
) : (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="text-left text-text-muted border-b border-border">
|
||||
<th className="py-2 pr-3">Name</th>
|
||||
<th className="py-2 pr-3">Endpoint</th>
|
||||
<th className="py-2 pr-3">Status</th>
|
||||
<th className="py-2 pr-3">Health (24h)</th>
|
||||
<th className="py-2 pr-3">Usage</th>
|
||||
<th className="py-2">Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{items.map((item) => {
|
||||
const usage = usageById[item.id];
|
||||
const health = healthById[item.id];
|
||||
return (
|
||||
<tr key={item.id} className="border-b border-border/60">
|
||||
<td className="py-2 pr-3">
|
||||
<div className="font-medium text-text-main">{item.name}</div>
|
||||
{item.region && (
|
||||
<div className="text-xs text-text-muted">{item.region}</div>
|
||||
)}
|
||||
</td>
|
||||
<td className="py-2 pr-3 font-mono text-xs text-text-muted">
|
||||
{item.type}://{item.host}:{item.port}
|
||||
</td>
|
||||
<td className="py-2 pr-3">
|
||||
<span className="text-xs px-2 py-1 rounded border border-border bg-bg-subtle">
|
||||
{item.status || "active"}
|
||||
</span>
|
||||
</td>
|
||||
<td className="py-2 pr-3 text-xs text-text-muted">
|
||||
{health ? (
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<span>{health.successRate ?? 0}% success</span>
|
||||
<span>{health.avgLatencyMs ?? "-"} ms avg</span>
|
||||
</div>
|
||||
) : (
|
||||
"-"
|
||||
)}
|
||||
</td>
|
||||
<td className="py-2 pr-3 text-xs text-text-muted">
|
||||
{usage ? `${usage.count} assignment(s)` : "-"}
|
||||
</td>
|
||||
<td className="py-2">
|
||||
<div className="flex items-center gap-1">
|
||||
<Button
|
||||
size="sm"
|
||||
variant="ghost"
|
||||
icon="visibility"
|
||||
onClick={() => void loadUsage(item.id)}
|
||||
>
|
||||
Usage
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="ghost"
|
||||
icon="edit"
|
||||
onClick={() => openEdit(item)}
|
||||
>
|
||||
Edit
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="ghost"
|
||||
icon="delete"
|
||||
onClick={() => void handleDelete(item.id)}
|
||||
className="!text-red-400"
|
||||
>
|
||||
Delete
|
||||
</Button>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
<Modal
|
||||
isOpen={modalOpen}
|
||||
onClose={() => {
|
||||
if (!saving) setModalOpen(false);
|
||||
}}
|
||||
title={editingId ? "Edit Proxy" : "Create Proxy"}
|
||||
maxWidth="lg"
|
||||
>
|
||||
<div className="flex flex-col gap-3">
|
||||
<div className="grid grid-cols-2 gap-3">
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Name</label>
|
||||
<input
|
||||
data-testid="proxy-registry-name-input"
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.name}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, name: e.target.value }))}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Type</label>
|
||||
<select
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.type}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, type: e.target.value }))}
|
||||
>
|
||||
<option value="http">HTTP</option>
|
||||
<option value="https">HTTPS</option>
|
||||
<option value="socks5">SOCKS5</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Host</label>
|
||||
<input
|
||||
data-testid="proxy-registry-host-input"
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.host}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, host: e.target.value }))}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Port</label>
|
||||
<input
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.port}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, port: e.target.value }))}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Username</label>
|
||||
<input
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.username}
|
||||
placeholder={editingId ? "Leave blank to keep current username" : ""}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, username: e.target.value }))}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Password</label>
|
||||
<input
|
||||
type="password"
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.password}
|
||||
placeholder={editingId ? "Leave blank to keep current password" : ""}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, password: e.target.value }))}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Region</label>
|
||||
<input
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.region}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, region: e.target.value }))}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Status</label>
|
||||
<select
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.status}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, status: e.target.value }))}
|
||||
>
|
||||
<option value="active">active</option>
|
||||
<option value="inactive">inactive</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Notes</label>
|
||||
<textarea
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={form.notes}
|
||||
onChange={(e) => setForm((prev) => ({ ...prev, notes: e.target.value }))}
|
||||
rows={3}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-end gap-2 pt-2 border-t border-border">
|
||||
<Button size="sm" variant="secondary" onClick={() => setModalOpen(false)}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button size="sm" icon="save" onClick={handleSave} loading={saving}>
|
||||
Save
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</Modal>
|
||||
|
||||
<Modal
|
||||
isOpen={bulkOpen}
|
||||
onClose={() => {
|
||||
if (!bulkSaving) setBulkOpen(false);
|
||||
}}
|
||||
title="Bulk Proxy Assignment"
|
||||
maxWidth="lg"
|
||||
>
|
||||
<div className="flex flex-col gap-3">
|
||||
<div className="grid grid-cols-2 gap-3">
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Scope</label>
|
||||
<select
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={bulkScope}
|
||||
onChange={(e) => setBulkScope(e.target.value)}
|
||||
>
|
||||
<option value="global">global</option>
|
||||
<option value="provider">provider</option>
|
||||
<option value="account">account</option>
|
||||
<option value="combo">combo</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">Proxy</label>
|
||||
<select
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
value={bulkProxyId}
|
||||
onChange={(e) => setBulkProxyId(e.target.value)}
|
||||
>
|
||||
<option value="">(clear assignment)</option>
|
||||
{items.map((item) => (
|
||||
<option key={item.id} value={item.id}>
|
||||
{item.name} ({item.type}://{item.host}:{item.port})
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{bulkScope !== "global" && (
|
||||
<div>
|
||||
<label className="text-xs text-text-muted mb-1 block">
|
||||
Scope IDs (comma or newline)
|
||||
</label>
|
||||
<textarea
|
||||
data-testid="proxy-registry-bulk-scopeids-input"
|
||||
className="w-full px-3 py-2 rounded bg-bg-subtle border border-border"
|
||||
rows={5}
|
||||
value={bulkScopeIds}
|
||||
onChange={(e) => setBulkScopeIds(e.target.value)}
|
||||
placeholder="provider-openai,provider-anthropic"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex items-center justify-end gap-2 pt-2 border-t border-border">
|
||||
<Button size="sm" variant="secondary" onClick={() => setBulkOpen(false)}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
icon="done_all"
|
||||
onClick={handleBulkAssign}
|
||||
loading={bulkSaving}
|
||||
data-testid="proxy-registry-bulk-apply"
|
||||
>
|
||||
Apply
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</Modal>
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -3,6 +3,7 @@
|
||||
import { useState, useEffect, useRef } from "react";
|
||||
import { Card, Button, ProxyConfigModal } from "@/shared/components";
|
||||
import { useTranslations } from "next-intl";
|
||||
import ProxyRegistryManager from "./ProxyRegistryManager";
|
||||
|
||||
export default function ProxyTab() {
|
||||
const [proxyModalOpen, setProxyModalOpen] = useState(false);
|
||||
@@ -41,39 +42,43 @@ export default function ProxyTab() {
|
||||
|
||||
return (
|
||||
<>
|
||||
<Card className="p-0 overflow-hidden">
|
||||
<div className="p-6">
|
||||
<div className="flex items-center gap-2 mb-4">
|
||||
<span className="material-symbols-outlined text-xl text-primary" aria-hidden="true">
|
||||
vpn_lock
|
||||
</span>
|
||||
<h2 className="text-lg font-bold">{t("globalProxy")}</h2>
|
||||
<div className="flex flex-col gap-6">
|
||||
<Card className="p-0 overflow-hidden">
|
||||
<div className="p-6">
|
||||
<div className="flex items-center gap-2 mb-4">
|
||||
<span className="material-symbols-outlined text-xl text-primary" aria-hidden="true">
|
||||
vpn_lock
|
||||
</span>
|
||||
<h2 className="text-lg font-bold">{t("globalProxy")}</h2>
|
||||
</div>
|
||||
<p className="text-sm text-text-muted mb-4">{t("globalProxyDesc")}</p>
|
||||
<div className="flex items-center gap-3">
|
||||
{globalProxy ? (
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="px-2.5 py-1 rounded text-xs font-bold uppercase bg-emerald-500/15 text-emerald-400 border border-emerald-500/30">
|
||||
{globalProxy.type}://{globalProxy.host}:{globalProxy.port}
|
||||
</span>
|
||||
</div>
|
||||
) : (
|
||||
<span className="text-sm text-text-muted">{t("noGlobalProxy")}</span>
|
||||
)}
|
||||
<Button
|
||||
size="sm"
|
||||
variant={globalProxy ? "secondary" : "primary"}
|
||||
icon="settings"
|
||||
onClick={() => {
|
||||
loadGlobalProxy();
|
||||
setProxyModalOpen(true);
|
||||
}}
|
||||
>
|
||||
{globalProxy ? tc("edit") : t("configure")}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
<p className="text-sm text-text-muted mb-4">{t("globalProxyDesc")}</p>
|
||||
<div className="flex items-center gap-3">
|
||||
{globalProxy ? (
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="px-2.5 py-1 rounded text-xs font-bold uppercase bg-emerald-500/15 text-emerald-400 border border-emerald-500/30">
|
||||
{globalProxy.type}://{globalProxy.host}:{globalProxy.port}
|
||||
</span>
|
||||
</div>
|
||||
) : (
|
||||
<span className="text-sm text-text-muted">{t("noGlobalProxy")}</span>
|
||||
)}
|
||||
<Button
|
||||
size="sm"
|
||||
variant={globalProxy ? "secondary" : "primary"}
|
||||
icon="settings"
|
||||
onClick={() => {
|
||||
loadGlobalProxy();
|
||||
setProxyModalOpen(true);
|
||||
}}
|
||||
>
|
||||
{globalProxy ? tc("edit") : t("configure")}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
</Card>
|
||||
|
||||
<ProxyRegistryManager />
|
||||
</div>
|
||||
|
||||
<ProxyConfigModal
|
||||
isOpen={proxyModalOpen}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "path";
|
||||
import fs from "fs";
|
||||
import os from "os";
|
||||
import { getDbInstance, SQLITE_FILE } from "@/lib/db/core";
|
||||
import { isAuthRequired, isAuthenticated } from "@/shared/utils/apiAuth";
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import { getDbInstance, SQLITE_FILE } from "@/lib/db/core";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import os from "node:os";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import os from "os";
|
||||
|
||||
/**
|
||||
* GET /api/db-backups/exportAll
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import Database from "better-sqlite3";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "path";
|
||||
import fs from "fs";
|
||||
import os from "os";
|
||||
import { getDbInstance, resetDbInstance, SQLITE_FILE } from "@/lib/db/core";
|
||||
import { backupDbFile } from "@/lib/db/backup";
|
||||
import { isAuthRequired, isAuthenticated } from "@/shared/utils/apiAuth";
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* GET /api/logs/detail — List detailed request logs
|
||||
* GET /api/logs/detail/:id — Get specific detailed log
|
||||
* POST /api/logs/detail/toggle — Enable/disable detailed logging
|
||||
*/
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
import { isAuthenticated } from "@/shared/utils/apiAuth";
|
||||
import {
|
||||
getRequestDetailLogs,
|
||||
getRequestDetailLogCount,
|
||||
isDetailedLoggingEnabled,
|
||||
} from "@/lib/db/detailedLogs";
|
||||
import { updateSettings } from "@/lib/db/settings";
|
||||
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
export async function GET(req: NextRequest) {
|
||||
if (!isAuthenticated(req)) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
const url = new URL(req.url);
|
||||
const limit = Math.min(Number(url.searchParams.get("limit") ?? 50), 200);
|
||||
const offset = Number(url.searchParams.get("offset") ?? 0);
|
||||
|
||||
const logs = getRequestDetailLogs(limit, offset);
|
||||
const total = getRequestDetailLogCount();
|
||||
const enabled = await isDetailedLoggingEnabled();
|
||||
|
||||
return NextResponse.json({ enabled, total, logs });
|
||||
}
|
||||
|
||||
export async function POST(req: NextRequest) {
|
||||
if (!isAuthenticated(req)) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
const body = await req.json();
|
||||
const enabled = body.enabled === true || body.enabled === "1";
|
||||
|
||||
await updateSettings({ detailed_logs_enabled: enabled });
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
enabled,
|
||||
message: enabled
|
||||
? "Detailed logging enabled. Pipeline bodies will be captured for new requests."
|
||||
: "Detailed logging disabled.",
|
||||
});
|
||||
}
|
||||
@@ -13,11 +13,13 @@ export async function GET() {
|
||||
const { getAllCircuitBreakerStatuses } = await import("@/shared/utils/circuitBreaker");
|
||||
const { getAllRateLimitStatus } = await import("@omniroute/open-sse/services/rateLimitManager");
|
||||
const { getAllModelLockouts } = await import("@omniroute/open-sse/services/accountFallback");
|
||||
const { getInflightCount } = await import("@omniroute/open-sse/services/requestDedup.ts");
|
||||
|
||||
const settings = await getSettings();
|
||||
const circuitBreakers = getAllCircuitBreakerStatuses();
|
||||
const rateLimitStatus = getAllRateLimitStatus();
|
||||
const lockouts = getAllModelLockouts();
|
||||
const { getAllHealthStatuses } = await import("@/lib/localHealthCheck");
|
||||
|
||||
// System info
|
||||
const system = {
|
||||
@@ -46,8 +48,12 @@ export async function GET() {
|
||||
timestamp: new Date().toISOString(),
|
||||
system,
|
||||
providerHealth,
|
||||
localProviders: getAllHealthStatuses(),
|
||||
rateLimitStatus,
|
||||
lockouts,
|
||||
dedup: {
|
||||
inflightRequests: getInflightCount(),
|
||||
},
|
||||
setupComplete: settings?.setupComplete || false,
|
||||
});
|
||||
} catch (error) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import { timingSafeEqual } from "node:crypto";
|
||||
import { timingSafeEqual } from "crypto";
|
||||
import {
|
||||
getProvider,
|
||||
generateAuthData,
|
||||
|
||||