Compare commits

...

41 Commits

Author SHA1 Message Date
diegosouzapw 7cb420d8e6 feat(release): v2.0.8 — custom image model handler resolution
Build Electron Desktop App / Validate version (push) Failing after 26s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
2026-03-07 10:05:20 -03:00
Diego Rodrigues de Sa e Souza d3919d441f Merge pull request #239 from diegosouzapw/fix/issue-238-image-handler
fix: pass resolved provider to image handler for custom models (#238)
2026-03-07 10:04:24 -03:00
diegosouzapw 4b5824babc fix: pass resolved provider to image handler for custom models (#238) 2026-03-07 10:03:48 -03:00
diegosouzapw fb87df14fd feat(release): v2.0.7 — custom image model routing + Codex OAuth workspace isolation
Build Electron Desktop App / Validate version (push) Failing after 34s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
2026-03-07 06:58:07 -03:00
Diego Rodrigues de Sa e Souza da9e4e929b Merge pull request #237 from diegosouzapw/fix/issue-232-236-image-oauth
fix: custom image model routing + Codex OAuth workspace isolation (#232, #236)
2026-03-07 06:56:49 -03:00
diegosouzapw 10b23b15ae fix: custom image model routing + Codex OAuth workspace isolation (#232, #236) 2026-03-07 06:56:09 -03:00
diegosouzapw 30fba39b35 feat(release): v2.0.6 — custom model apiFormat routing fix
Build Electron Desktop App / Validate version (push) Failing after 33s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
2026-03-07 01:36:21 -03:00
Diego Rodrigues de Sa e Souza 5a75ff67c9 Merge pull request #233 from diegosouzapw/fix/issue-204-apiformat-routing
fix: wire apiFormat from custom model DB into routing layer (#204)
2026-03-07 01:35:30 -03:00
diegosouzapw 358828b617 fix: wire apiFormat from custom model DB into routing layer (#204) 2026-03-07 01:26:59 -03:00
diegosouzapw e080c4a16a feat(release): v2.0.5 — fix Chat→Responses reasoning IDs, electron auto-update, dependency bumps
Build Electron Desktop App / Validate version (push) Failing after 31s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
2026-03-06 18:51:24 -03:00
Diego Rodrigues de Sa e Souza 04b7e38baf Merge pull request #221 from benzntech/feat/electron-auto-update
feat(electron): add auto-update functionality with electron-updater
2026-03-06 18:49:54 -03:00
Diego Rodrigues de Sa e Souza 7ee23fbe19 Merge pull request #230 from diegosouzapw/dependabot/npm_and_yarn/express-rate-limit-8.3.0
deps: bump express-rate-limit from 8.2.1 to 8.3.0
2026-03-06 18:49:51 -03:00
Diego Rodrigues de Sa e Souza c49bdb4ebb Merge pull request #229 from diegosouzapw/dependabot/github_actions/docker/build-push-action-7
chore(deps): bump docker/build-push-action from 6 to 7
2026-03-06 18:49:48 -03:00
Diego Rodrigues de Sa e Souza 0f7efed8d5 Merge pull request #228 from diegosouzapw/dependabot/github_actions/actions/upload-artifact-7
chore(deps): bump actions/upload-artifact from 4 to 7
2026-03-06 18:49:46 -03:00
Diego Rodrigues de Sa e Souza d07bc6dcf3 Merge pull request #227 from diegosouzapw/dependabot/github_actions/docker/login-action-4
chore(deps): bump docker/login-action from 3 to 4
2026-03-06 18:49:43 -03:00
Diego Rodrigues de Sa e Souza d607d46fa3 Merge pull request #226 from diegosouzapw/dependabot/github_actions/actions/download-artifact-8
chore(deps): bump actions/download-artifact from 4 to 8
2026-03-06 18:49:40 -03:00
Diego Rodrigues de Sa e Souza 2225dd14aa Merge pull request #225 from diegosouzapw/dependabot/github_actions/actions/cache-5
chore(deps): bump actions/cache from 4 to 5
2026-03-06 18:49:37 -03:00
Diego Rodrigues de Sa e Souza f6c0e7bbbe Merge pull request #222 from benzntech/fix/electron-release-duplicate-asset
fix(ci): remove duplicate OmniRoute.exe entry in electron release workflow
2026-03-06 18:49:28 -03:00
Diego Rodrigues de Sa e Souza c4675c5219 Merge pull request #231 from diegosouzapw/fix/issue-224-reasoning-ids
fix: omit synthesized reasoning items in Chat→Responses translation (#224)
2026-03-06 18:49:25 -03:00
diegosouzapw 2d977a3c4d fix: omit synthesized reasoning items in Chat→Responses translation (#224) 2026-03-06 18:48:34 -03:00
dependabot[bot] 9405918258 deps: bump express-rate-limit from 8.2.1 to 8.3.0
Bumps [express-rate-limit](https://github.com/express-rate-limit/express-rate-limit) from 8.2.1 to 8.3.0.
- [Release notes](https://github.com/express-rate-limit/express-rate-limit/releases)
- [Commits](https://github.com/express-rate-limit/express-rate-limit/compare/v8.2.1...v8.3.0)

---
updated-dependencies:
- dependency-name: express-rate-limit
  dependency-version: 8.3.0
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-06 18:46:36 +00:00
dependabot[bot] a69d7dd4b5 chore(deps): bump docker/build-push-action from 6 to 7
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 6 to 7.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](https://github.com/docker/build-push-action/compare/v6...v7)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-version: '7'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-06 18:27:03 +00:00
dependabot[bot] 428e6cb53f chore(deps): bump actions/upload-artifact from 4 to 7
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4 to 7.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v4...v7)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: '7'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-06 18:26:59 +00:00
dependabot[bot] c9a2955d28 chore(deps): bump docker/login-action from 3 to 4
Bumps [docker/login-action](https://github.com/docker/login-action) from 3 to 4.
- [Release notes](https://github.com/docker/login-action/releases)
- [Commits](https://github.com/docker/login-action/compare/v3...v4)

---
updated-dependencies:
- dependency-name: docker/login-action
  dependency-version: '4'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-06 18:26:54 +00:00
dependabot[bot] 7aefcd3437 chore(deps): bump actions/download-artifact from 4 to 8
Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 4 to 8.
- [Release notes](https://github.com/actions/download-artifact/releases)
- [Commits](https://github.com/actions/download-artifact/compare/v4...v8)

---
updated-dependencies:
- dependency-name: actions/download-artifact
  dependency-version: '8'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-06 18:26:51 +00:00
dependabot[bot] 79f4f79c46 chore(deps): bump actions/cache from 4 to 5
Bumps [actions/cache](https://github.com/actions/cache) from 4 to 5.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](https://github.com/actions/cache/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/cache
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-06 18:26:46 +00:00
benzntech c11c275678 fix(electron): address auto-updater review issues
- Remove unused dialog import
- Stop Next.js server before quitAndInstall() to prevent data loss
- Propagate errors from checkForUpdates/downloadUpdate to IPC handlers
  so renderer can distinguish success from failure
- Remove meaningless return value from install-update handler
2026-03-06 19:22:41 +05:30
benzntech bbcd1d3a08 fix(ci): remove duplicate OmniRoute.exe entry in electron release workflow
Duplicate release-assets/OmniRoute.exe glob caused softprops/action-gh-release
to attempt a second upload of the same asset, triggering a 404 Not Found error
on the GitHub release asset update API. The file is already covered by the
*.exe glob pattern above it.
2026-03-06 19:18:41 +05:30
benzntech 3342d5b931 feat(electron): add auto-update functionality with electron-updater 2026-03-06 18:54:00 +05:30
diegosouzapw f96ee44213 feat(release): v2.0.4 — round-robin lastUsedAt persistence, zod standalone build fix
Build Electron Desktop App / Validate version (push) Failing after 31s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
2026-03-05 23:24:56 -03:00
Diego Rodrigues de Sa e Souza bc53fe0cd9 Merge pull request #219 from diegosouzapw/fix/issue-218-round-robin-lastUsedAt
fix: persist lastUsedAt for round-robin + zod in standalone build (#218, #217)
2026-03-05 23:24:13 -03:00
diegosouzapw 97a67b5d3e fix: persist lastUsedAt in provider_connections schema for round-robin (#218)
- Add last_used_at column to provider_connections CREATE TABLE schema
- Add ensureProviderConnectionsColumns migration for existing databases
- Add last_used_at to INSERT and UPDATE SQL in providers.ts
- Add last_used_at to JSON migration INSERT in core.ts
- Add zod to serverExternalPackages in next.config.mjs (#217)

Fixes #218: Round-robin routing strategy now correctly persists
the lastUsedAt timestamp, allowing rotation between accounts.

Fixes #217: zod module is now properly included in standalone/Docker
builds by declaring it as a server external package.
2026-03-05 23:22:10 -03:00
diegosouzapw 1ffa58be76 feat(release): v2.0.3 — deferred tools cache_control fix, quota system hardening
Build Electron Desktop App / Validate version (push) Failing after 29s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
2026-03-05 21:57:04 -03:00
Diego Rodrigues de Sa e Souza a5cf51c0b9 Merge pull request #214 from DavyMassoneto/fix/claude-oauth-usage-endpoint
fix: harden quota system — code review fixes + build fix
2026-03-05 21:55:23 -03:00
Diego Rodrigues de Sa e Souza 3d38cbf70f Merge pull request #216 from DavyMassoneto/fix/defer-loading-cache-control-conflict
fix: skip cache_control on deferred tools + remove stale schemas.js
2026-03-05 21:55:14 -03:00
DavyMassoneto 196a4e037c fix: skip cache_control on deferred tools + remove stale schemas.js
- Skip tools with defer_loading=true when assigning cache_control
  (Anthropic API rejects the combination with 400)
- Delete stale schemas.js that shadowed the .ts source, causing
  missing cloudSyncActionSchema export

Fixes #215
2026-03-05 20:19:58 -03:00
diegosouzapw 6a0760a2c5 chore: bump to v2.0.2 (v2.0.1 was claimed on npm)
Build Electron Desktop App / Validate version (push) Failing after 40s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
2026-03-05 20:11:15 -03:00
diegosouzapw b0819404c7 feat(release): v2.0.1 — endpoint-aware models, 3 bug fixes (#212, #213, #200), 3 features (#204, #205, #206)
Build Electron Desktop App / Validate version (push) Failing after 35s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
2026-03-05 20:04:41 -03:00
DavyMassoneto bfe495931f fix(claude): correct utilization semantics, harden quota cache, fix premature model unavailability
- Fix inverted Claude OAuth utilization (remaining, not used)
- Add hasUtilization() guard to prevent false exhaustion from empty responses
- Centralize anthropic-version into CLAUDE_CONFIG.apiVersion
- Add parseDate() for safe date validation in quota cache
- Batch background refresh with MAX_CONCURRENT_REFRESHES=5
- Move setModelUnavailable to after all accounts exhausted, not first 429
- Extract safePercentage() to shared utils (dedup)
- Use isRecord() type guard in usage API route
- Exclude binary files from Tailwind v4 source scanning
2026-03-05 19:39:59 -03:00
DavyMassoneto 11bcdd810a feat: quota-aware account selection + fix premature model unavailability
- Move setModelUnavailable from per-account loop to all-accounts-exhausted path
- Clear model unavailability on successful fallback
- Add in-memory quota cache with background refresh (5min active, 20min exhausted)
- Integrate quota cache in account selection to skip exhausted accounts
- Mark accounts as exhausted from 429 when no cached quota data exists
- Populate quota cache from dashboard usage endpoint
2026-03-05 18:49:56 -03:00
diegosouzapw 228ebf436e feat: endpoint-aware model management + fix 3 bugs (#212, #213, #200)
Bug Fixes:
- #212: Auto-generate API_KEY_SECRET at startup (like JWT_SECRET)
- #213: Circuit breaker now scoped per-model instead of per-provider
- #200: Connectivity fallback for custom providers (Ollama, LM Studio)

Features:
- #204: API Format selector (Chat Completions / Responses API) for custom models
- #205: Combo endpoint field (chat / embeddings / images) in schema
- #206: Supported Endpoints checkboxes (chat, embeddings, images, audio)
- Custom models with endpoint tags appear in /v1/embeddings and /v1/images/generations
- Model catalog includes api_format, type, and supported_endpoints metadata
- Provider detail page shows badges for non-default endpoint configurations

Files changed: instrumentation.ts, combo.ts, validation.ts, models.ts,
schemas.ts, provider-models/route.ts, providers/[id]/page.tsx,
catalog.ts, embeddings/route.ts, images/generations/route.ts
2026-03-05 18:49:07 -03:00
70 changed files with 1324 additions and 1212 deletions
+3 -3
View File
@@ -31,14 +31,14 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
uses: docker/login-action@v4
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push by digest
id: build
uses: docker/build-push-action@v6
uses: docker/build-push-action@v7
with:
context: .
target: runner-base
@@ -87,7 +87,7 @@ jobs:
merge-multiple: true
- name: Login to Docker Hub
uses: docker/login-action@v3
uses: docker/login-action@v4
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
+3 -4
View File
@@ -78,7 +78,7 @@ jobs:
cache: npm
- name: Cache node_modules
uses: actions/cache@v4
uses: actions/cache@v5
with:
path: node_modules
key: ${{ runner.os }}-node-${{ hashFiles('package-lock.json') }}
@@ -120,7 +120,7 @@ jobs:
fi
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v7
with:
name: electron-${{ matrix.platform }}
path: release-assets/
@@ -136,7 +136,7 @@ jobs:
fetch-depth: 0
- name: Download all artifacts
uses: actions/download-artifact@v4
uses: actions/download-artifact@v8
with:
path: release-assets
merge-multiple: true
@@ -172,6 +172,5 @@ jobs:
release-assets/*.blockmap
release-assets/*.source.tar.gz
release-assets/*.source.zip
release-assets/OmniRoute.exe
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+5
View File
@@ -117,3 +117,8 @@ icon.iconset/
# VS Code Extension (independent Git repo)
vscode-extension/
# SQLite residual files
*.sqlite-shm
*.sqlite-wal
*.sqlite-journal
+202 -4
View File
@@ -7,6 +7,204 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
---
## [2.0.8] — 2026-03-07
> ### 🐛 Bug Fix — Custom Image Model Handler Resolution
### 🐛 Bug Fixes
- **#238 — Custom image models still fail in handler layer** — v2.0.7 fixed the route-layer validation, but the handler (`handleImageGeneration()`) called `parseImageModel()` again internally, rejecting custom models a second time. Fix: handler now accepts an optional `resolvedProvider` parameter; when provided, it skips re-validation and routes custom models to the OpenAI-compatible handler with a synthetic config. PR #239
### 📁 Files Changed
| File | Change |
| -------------------------------------------- | -------------------------------------------------------------------------------- |
| `open-sse/handlers/imageGeneration.ts` | Added `resolvedProvider` param + custom model fallback |
| `src/app/api/v1/images/generations/route.ts` | Tracks `isCustomModel`, passes `resolvedProvider`, credentials for custom models |
---
## [2.0.7] — 2026-03-07
> ### 🐛 Bug Fixes — Custom Image Models + Codex OAuth Workspace Isolation
### 🐛 Bug Fixes
- **#232 — Custom Gemini image models fail on `/v1/images/generations`** — Custom models tagged with `supportedEndpoints: ["images"]` appeared in the model listing (GET) but were rejected by the POST handler. `parseImageModel()` only checked the built-in `IMAGE_PROVIDERS` registry. Fix: added a custom model DB fallback for models with the `images` endpoint tag. PR #237
- **#236 — Codex OAuth overwrites existing connection when same email added to another workspace** — The OAuth callback route had 3 upsert blocks matching connections by email-only, bypassing the workspace-aware logic in `createProviderConnection()`. When the same email authenticated to a new workspace, the existing connection's `workspaceId` was silently overwritten. Fix: for Codex, the match now also checks `providerSpecificData.workspaceId`, allowing separate connections per workspace. PR #237
### 📁 Files Changed
| File | Change |
| ------------------------------------------------ | ---------------------------------------------------- |
| `src/app/api/v1/images/generations/route.ts` | Custom model DB fallback in POST handler |
| `src/app/api/oauth/[provider]/[action]/route.ts` | Workspace-aware Codex matching in 3 upsert locations |
### ⏭️ Issues Triaged
- **#234** — Playground feature request — Acknowledged, added to roadmap
- **#235** — ACP support feature request — Acknowledged, added to roadmap
---
## [2.0.6] — 2026-03-07
> ### 🐛 Bug Fix — Custom Model API Format Routing
### 🐛 Bug Fixes
- **#204 — Custom model `apiFormat` not used in routing** — Custom models configured with `apiFormat: "responses"` in the dashboard were still being routed through the Chat Completions translator. The `apiFormat` field was stored in the DB and displayed in the UI, but never consumed by the routing layer. Fix: `getModelInfo()` now returns `apiFormat` from the custom model DB, and both `resolveModelOrError()` functions override `targetFormat` to `openai-responses` when set. PR #233
### ✅ Issues Closed
- **#205** — Combo endpoint support — Already implemented in v2.0.2
- **#206** — Manual model→endpoint mapping — Already implemented in v2.0.2
- **#223** — CLI fingerprint parity — Responded with 4-phase roadmap
### 📁 Files Changed
| File | Change |
| --------------------------------- | ---------------------------------------------------------------------- |
| `src/sse/services/model.ts` | Added `lookupCustomModelApiFormat()`, enriched `getModelInfo()` return |
| `src/sse/handlers/chat.ts` | Override `targetFormat` when `apiFormat === "responses"` |
| `src/sse/handlers/chatHelpers.ts` | Same override in duplicate `resolveModelOrError()` |
---
## [2.0.5] — 2026-03-06
> ### 🐛 Bug Fix, Electron Auto-Update & Dependency Bumps
### 🐛 Bug Fixes
- **#224 — Chat→Responses translation creates invalid reasoning IDs** — Removed synthetic reasoning item generation in `openaiToOpenAIResponsesRequest()`. The translator was creating reasoning items with IDs like `reasoning_15`, but OpenAI's Responses API requires server-generated `rs_*` IDs, causing `400 Invalid Request` errors from Responses-compatible upstreams. Fix: omit reasoning items entirely during translation
- **CI: duplicate OmniRoute.exe in release workflow** — Removed redundant explicit `release-assets/OmniRoute.exe` entry that caused `softprops/action-gh-release` to fail with 404 on duplicate upload. PR #222 by @benzntech
### ✨ New Features
- **Electron Auto-Update** — Added auto-update functionality to the desktop app using `electron-updater`. Includes IPC handlers for check/download/install, "Check for Updates" in system tray menu, desktop notification when update is ready, and silent startup check (3s delay). PR #221 by @benzntech
### 📦 Dependencies
- Bump `actions/cache` from 4 to 5 (#225)
- Bump `actions/download-artifact` from 4 to 8 (#226)
- Bump `docker/login-action` from 3 to 4 (#227)
- Bump `actions/upload-artifact` from 4 to 7 (#228)
- Bump `docker/build-push-action` from 6 to 7 (#229)
- Bump `express-rate-limit` from 8.2.1 to 8.3.0 (#230)
### 📁 Files Changed
| File | Change |
| ------------------------------------------------- | ---------------------------------------------------- |
| `open-sse/translator/request/openai-responses.ts` | Remove synthetic reasoning item generation |
| `.github/workflows/electron-release.yml` | Remove duplicate exe entry, bump GH Actions |
| `.github/workflows/docker-publish.yml` | Bump docker/login-action and build-push-action |
| `electron/main.js` | Auto-updater setup, IPC handlers, tray menu |
| `electron/package.json` | Added electron-updater dep and GitHub publish config |
| `electron/preload.js` | Exposed update APIs via contextBridge |
| `package-lock.json` | Updated express-rate-limit |
---
## [2.0.4] — 2026-03-06
> ### 🐛 Bug Fixes — Round-Robin Persistence & Docker Compatibility
### 🐛 Bug Fixes
- **#218 — Round-robin sticks to one account** — Added `last_used_at` column to `provider_connections` schema. Round-robin routing relied on `lastUsedAt` to rotate between accounts, but the column was missing from the database — the value was always `null`, causing selection to fall back to the same account. Includes auto-migration for existing databases
- **#217`Cannot find module 'zod'` in Docker/standalone builds** — Added `zod` to `serverExternalPackages` in `next.config.mjs`. Next.js standalone builds weren't tracing `zod` through dynamic imports, causing crashes on Docker startup. Data is **not lost** — the crash prevented the server from reading the existing database
### 📁 Files Changed
| File | Change |
| ------------------------- | ------------------------------------------------------ |
| `src/lib/db/core.ts` | Schema + migration + JSON migration for `last_used_at` |
| `src/lib/db/providers.ts` | INSERT + UPDATE SQL for `last_used_at` |
| `next.config.mjs` | `serverExternalPackages: ['better-sqlite3', 'zod']` |
---
## [2.0.3] — 2026-03-05
> ### 🐛 Bug Fixes & Quota System Hardening
### 🐛 Bug Fixes
- **#215 — Deferred tools 400 error** — Skip `cache_control` on tools with `defer_loading=true` when assigning prompt caching to the last tool. Previously, the API rejected requests with 400 when MCP tools (Playwright, etc.) had deferred loading enabled. Fix applied in both `claudeHelper.ts` and `openai-to-claude.ts` translation layers. PR #216 by @DavyMassoneto
- **Stale compiled schemas.js** — Deleted stale compiled `schemas.js` (912 lines) that shadowed the TypeScript `.ts` source, causing `cloudSyncActionSchema` warnings in the dashboard. PR #216 by @DavyMassoneto
- **#202 — False quota exhaustion** — Fixed empty API responses (`{}`) creating quota objects with `utilization ?? 0` = 0% remaining, incorrectly marking accounts as exhausted. Added `hasUtilization()` guard. PR #214 by @DavyMassoneto
- **Invalid date crash** — `parseDate()` now validates dates before comparison, handling `Invalid Date` from malformed `resetAt` values gracefully. PR #214 by @DavyMassoneto
- **`total=0` false infinite quota** — `normalizeQuotas` now defaults to 0% remaining when `total` is zero (was incorrectly reporting 100%). PR #214 by @DavyMassoneto
- **Tailwind v4 build failure** — Tailwind v4 scanned `*.sqlite-shm`/`.sqlite-wal` binary files, triggering "Invalid code point" errors. Added `@source not` exclusions in `globals.css`. PR #214 by @DavyMassoneto
### ✨ Improvements
- **Quota-aware account selection** — All load-balancing strategies (sticky, round-robin, p2c, random, least-used, cost-optimized, fill-first) now prioritize accounts with available quota over exhausted ones. PR #214 by @DavyMassoneto
- **Concurrent refresh protection** — `tickRunning` flag prevents overlapping background quota refresh ticks; `refreshingSet` deduplicates per-connection refreshes. Thundering herd prevention with `MAX_CONCURRENT_REFRESHES=5`. PR #214 by @DavyMassoneto
- **`clearModelUnavailability` on success** — Model unavailability is now cleared on every successful request, not only on fallback paths. PR #214 by @DavyMassoneto
- **Centralized `anthropic-version`** — Hardcoded `anthropic-version` header (3 occurrences) centralized into `CLAUDE_CONFIG.apiVersion`. PR #214 by @DavyMassoneto
- **Extracted `safePercentage()` utility** — Shared percentage validation function extracted to `src/shared/utils/formatting.ts`, eliminating duplication between backend and frontend. PR #214 by @DavyMassoneto
- **`isRecord()` type guard** — Replaces inline `typeof` chain in usage API route. PR #214 by @DavyMassoneto
### 📁 Files Changed
| File | Change |
| ------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
| `open-sse/translator/helpers/claudeHelper.ts` | Skip `cache_control` on deferred tools |
| `open-sse/translator/request/openai-to-claude.ts` | Same fix in translator layer |
| `src/shared/validation/schemas.js` | **DELETED** — stale compiled JS |
| `.gitignore` | Exclude Tailwind binary scanning |
| `open-sse/services/usage.ts` | Legacy endpoint fallback logging |
| `src/domain/quotaCache.ts` | **NEW** — Core quota cache with hardening |
| `src/shared/utils/formatting.ts` | **NEW**`safePercentage()` utility |
| `src/instrumentation.ts` | Startup log for quota cache |
| `src/sse/handlers/chat.ts` | `clearModelUnavailability` + `markAccountExhaustedFrom429` |
| `src/sse/services/auth.ts` | Quota-aware account selection |
| `src/app/globals.css` | Tailwind `@source not` exclusions |
| `src/app/api/usage/[connectionId]/route.ts` | `isRecord()` type guard |
| `src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx` | Use `remainingPercentage` directly |
| `src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx` | Use shared `safePercentage()` |
---
## [2.0.2] — 2026-03-05
> ### 🐛 Bug Fixes & ✨ Endpoint-Aware Model Management
### 🐛 Bug Fixes
- **#212 — API Key creation crash** — Auto-generate `API_KEY_SECRET` at startup (like `JWT_SECRET`) to prevent HMAC crashes
- **#213 — Circuit breaker scope** — Changed circuit breaker key from provider-level to model-level; a 429 on one account no longer blocks all accounts for the same provider
- **#200 — Custom provider connection check** — Added connectivity fallback for OpenAI-compatible providers (Ollama, LM Studio); if `/models` and `/chat/completions` fail, a simple HTTP ping to the base URL marks the provider as connected
### ✨ New Features
- **#204 — API Format selector** — Custom models can now specify `apiFormat`: `chat-completions` (default) or `responses` (for the Responses API)
- **#205 — Combo endpoint support** — Combos now accept an `endpoint` field in the schema (`chat` | `embeddings` | `images`), enabling fallback/rotation combos for non-chat endpoints
- **#206 — Supported Endpoints mapping** — When adding custom models, users can check which endpoints the model supports (💬 Chat, 📐 Embeddings, 🖼️ Images, 🔊 Audio). Models tagged for embeddings appear in `/v1/embeddings` and models tagged for images appear in `/v1/images/generations`
- **Visual badges** — Model rows now display colored badges for non-default API formats and endpoint types
- **Model catalog metadata** — `/v1/models` response includes `api_format`, `type`, and `supported_endpoints` for custom models
### 📁 Files Changed
| File | Change |
| ------------------------------------------------------- | ------------------------------------------------ |
| `src/instrumentation.ts` | Auto-generate `API_KEY_SECRET` |
| `open-sse/services/combo.ts` | Circuit breaker keyed per-model |
| `src/lib/providers/validation.ts` | Connectivity fallback ping |
| `src/lib/db/models.ts` | `apiFormat` + `supportedEndpoints` fields |
| `src/shared/schemas/validation.ts` | `endpoint` in `comboSchema` |
| `src/shared/validation/schemas.ts` | Extended `providerModelMutationSchema` |
| `src/app/api/provider-models/route.ts` | Pass new fields through API |
| `src/app/(dashboard)/dashboard/providers/[id]/page.tsx` | API format dropdown, endpoint checkboxes, badges |
| `src/app/api/v1/models/catalog.ts` | Custom model metadata enrichment |
| `src/app/api/v1/embeddings/route.ts` | Include custom embedding models |
| `src/app/api/v1/images/generations/route.ts` | Include custom image models |
---
## [2.0.0] — 2026-03-05
> ### 🚀 Major Release — MCP Multi-Transport, A2A Protocol, Auto-Combo Engine & Full Type Safety Overhaul
@@ -131,13 +329,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| `src/lib/a2a/skills/` | `smartRouting.ts`, `quotaManagement.ts` |
| `src/app/a2a/` | `route.ts` (JSON-RPC 2.0 dispatch handler) |
| `src/app/api/mcp/sse/` | `route.ts` (SSE transport endpoint) |
| `src/app/api/mcp/stream/` | `route.ts` (Streamable HTTP transport endpoint) |
| `src/app/api/mcp/stream/` | `route.ts` (Streamable HTTP transport endpoint) |
| `open-sse/services/autoCombo/` | `scoring.ts`, `taskFitness.ts`, `engine.ts`, `selfHealing.ts`, `modePacks.ts`, `persistence.ts`, `index.ts` |
| `src/shared/contracts/` | `quota.ts` (shared API contracts) |
| `src/shared/constants/` | `mcpScopes.ts` |
| `src/shared/validation/` | `settingsSchemas.ts` (extracted settings Zod schema) |
| `src/lib/db/migrations/` | `002_mcp_a2a_tables.sql` |
| `src/app/(dashboard)/` | `dashboard/mcp/page.tsx`, `dashboard/a2a/page.tsx`, `dashboard/auto-combo/page.tsx`, `dashboard/endpoint/ApiEndpointsTab.tsx` |
| `src/app/(dashboard)/` | `dashboard/mcp/page.tsx`, `dashboard/a2a/page.tsx`, `dashboard/auto-combo/page.tsx`, `dashboard/endpoint/ApiEndpointsTab.tsx` |
| `vscode-extension/src/services/` | `mcpClient.ts`, `a2aClient.ts`, `policyEngine.ts`, `preflightDialog.ts`, `budgetGuard.ts`, `healthMonitor.ts`, `modePackSelector.ts`, `humanCheckpoint.ts` |
| `scripts/` | `check-cycles.mjs`, `check-docs-sync.mjs`, `check-route-validation.mjs`, `check-t11-any-budget.mjs`, `run-playwright-tests.mjs`, `runtime-env.mjs` |
| `tests/` | `t06-schema-hardening.test.mjs`, `t07-no-log-key-config.test.mjs`, `t08-mcp-scope-enforcement.test.mjs`, `ecosystem.test.ts` |
@@ -158,12 +356,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| `889e2ba` | 2026-03-04 | feat: add error pages, harden DB layer and compliance module |
| `cbd0b1c` | 2026-03-04 | refactor: harden open-sse services, eliminate any casts, add dashboard pages |
| `b33a853` | 2026-03-04 | feat: Introduce A2A lifecycle management, add type safety to ComfyUI and stream handling |
| `a1a2610` | 2026-03-04 | feat: v2.0.0 - MCP server, A2A agent, proxy improvements and docs update |
| `a1a2610` | 2026-03-04 | feat: v2.0.0 - MCP server, A2A agent, proxy improvements and docs update |
| `d615ca5` | 2026-03-05 | feat: configurable tool name prefix (#199) and custom rpm/tpm rate limits (#198) |
| `6d8868b` | 2026-03-05 | fix: extract validation helpers to fix webpack barrel-file resolution bug |
| `bc2e60c` | 2026-03-05 | feat: Introduce new A2A and MCP API routes, enhance dashboard UI, E2E tests |
| `79c23df` | 2026-03-05 | feat: Add i18n for media/themes, enhance combos with strategy guides, E2E tests |
| `2490ba5` | 2026-03-05 | feat: Introduce combo readiness checks and strategy recommendations |
| `2490ba5` | 2026-03-05 | feat: Introduce combo readiness checks and strategy recommendations |
| `48dda26` | 2026-03-05 | fix: CORS headers on early-return error responses + auto-combo validation (#208, #209) |
| `078a42b` | 2026-03-05 | feat: consolidate Endpoint, MCP, A2A into tabbed Endpoints page |
| `6f1e6a0` | 2026-03-05 | feat: add MCP/A2A enable/disable toggle switches on Endpoints page |
+4 -3
View File
@@ -287,6 +287,7 @@ Result: Never stop coding, minimal cost
- **مكافحة الرعد القطيع** — Mutex + حماية الإشارة ضد عواصف إعادة المحاولة المتزامنة
- **السلاسل الاحتياطية المجمعة** — إذا فشل الموفر الأساسي، فسيتم دخوله تلقائيًا عبر السلسلة دون أي تدخل
- **Combo Circuit Breaker** — التعطيل التلقائي لمقدمي الخدمات الفاشلين ضمن سلسلة التحرير والسرد
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **لوحة معلومات الصحة** — مراقبة وقت التشغيل، وحالات قاطع الدائرة، وعمليات التأمين، وإحصائيات ذاكرة التخزين المؤقت، ووقت الاستجابة p50/p95/p99
</details>
@@ -874,10 +875,10 @@ npm run electron:build:linux # Linux (.AppImage)
### 🤖 عمليات الوكيل والبروتوكول (الإصدار 2.0)| ميزة | ماذا يفعل |
| ------------------------------------ | -------------------------------------------------------------------------------- |
| 🔧 **خادم MCP (16 أداة)** | تتحكم أدوات IDE/agent في التوجيه والصحة والمجموعات والحدود والعمليات | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`)
| 🔧 **خادم MCP (16 أداة)** | تتحكم أدوات IDE/agent في التوجيه والصحة والمجموعات والحدود والعمليات | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`)
| 🤝 **خادم A2A (JSON-RPC + SSE)** | تنفيذ المهام من وكيل إلى وكيل مع تدفقات المزامنة والتدفق |
| 🧭 **Consolidated Endpoints Page** | Tabbed management page with Endpoint Proxy, MCP, A2A, and API Endpoints tabs |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🧭 **Consolidated Endpoints Page** | Tabbed management page with Endpoint Proxy, MCP, A2A, and API Endpoints tabs |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🛰️ **نبضات وقت تشغيل MCP** | حالة العملية الحقيقية (معرف المنتج، وقت التشغيل، عمر نبضات القلب، النقل، وضع النطاق) |
| 📋 **مسار تدقيق MCP** | سجلات التدقيق القابلة للتصفية مع النجاح/الفشل والإسناد الرئيسي |
| 🔐 **تنفيذ نطاق MCP** | 9 أذونات نطاق تفصيلية للوصول إلى الأدوات الخاضعة للرقابة |
+4 -3
View File
@@ -287,6 +287,7 @@ OpenAI използва един формат, Claude (Anthropic) използв
- **Anti-Thundering Herd** — Mutex + семафорна защита срещу едновременни повторни бури
- **Combo Fallback Chains** — Ако основният доставчик се провали, автоматично преминава през веригата без намеса
- **Combo Circuit Breaker** — Автоматично деактивира неизправните доставчици в рамките на комбинирана верига
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Health Dashboard** — Мониторинг на времето на работа, състояния на прекъсвачи, блокировки, статистика на кеша, латентност на p50/p95/p99
</details>
@@ -875,10 +876,10 @@ OmniRoute v2.0 е създаден като операционна платфо
### 🤖 Операции на агент и протокол (v2.0)| Характеристика | Какво прави |
| ------------------------------------ | -------------------------------------------------------------------------------- |
| 🔧 **MCP сървър (16 инструмента)** | Инструментите за IDE/агент контролират маршрутизиране, здраве, комбинации, ограничения и операции | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`)
| 🔧 **MCP сървър (16 инструмента)** | Инструментите за IDE/агент контролират маршрутизиране, здраве, комбинации, ограничения и операции | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`)
| 🤝 **A2A сървър (JSON-RPC + SSE)** | Изпълнение на задачи от агент към агент със синхронизиране и поточно предаване |
| 🧭 **Consolidated Endpoints Page** | Tabbed management page with Endpoint Proxy, MCP, A2A, and API Endpoints tabs |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🧭 **Consolidated Endpoints Page** | Tabbed management page with Endpoint Proxy, MCP, A2A, and API Endpoints tabs |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🛰️ **MCP Runtime Heartbeat** | Реално състояние на процеса (pid, време на работа, възраст на сърдечния ритъм, транспорт, режим на обхвата) |
| 📋 **MCP одитна пътека** | Филтрируеми журнали за одит с успех/неуспех и ключово приписване |
| 🔐 **Прилагане на обхват на MCP** | 9 подробни разрешения за обхват за контролиран достъп до инструменти |
+4 -2
View File
@@ -283,10 +283,12 @@ Når en AI-gateway eksponeres for netværket (LAN, VPS, Docker), kan enhver med
**Sådan løser OmniRoute det:**
- **Circuit Breaker pr. udbyder** — Automatisk åbning/lukning med konfigurerbare tærskler og nedkøling (lukket/åbent/halvt åbent)
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Eksponentiel backoff** — Progressive forsinkelser af genforsøg
- **Anti-tordenbesætning** — Mutex + semaforbeskyttelse mod samtidige genforsøgsstorme
- **Combo Fallback Chains** — Hvis den primære udbyder fejler, falder den automatisk gennem kæden uden indgriben
- **Combo Circuit Breaker** - Deaktiverer automatisk fejlende udbydere i en kombinationskæde
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Health Dashboard** — Oppetidsovervågning, strømafbrydertilstande, lockouts, cachestatistik, p50/p95/p99 latency
</details>
@@ -875,10 +877,10 @@ OmniRoute v2.0 er bygget som en operationel platform, ikke kun en relæ-proxy.
### 🤖 Agent- og protokoloperationer (v2.0)| Funktion | Hvad det gør |
| ------------------------------------ | ---------------------------------------------------------------------------------- |
| 🔧 **MCP-server (16 værktøjer)** | IDE/agent-værktøjer kontrollerer routing, sundhed, kombinationer, grænser og operationer | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`)
| 🔧 **MCP-server (16 værktøjer)** | IDE/agent-værktøjer kontrollerer routing, sundhed, kombinationer, grænser og operationer | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`)
| 🤝 **A2A-server (JSON-RPC + SSE)** | Agent-til-agent opgaveudførelse med synkronisering og streaming flows |
| 🧭 **Consolidated Endpoints Page** | Dedikerede administrationssider (`/dashboard/mcp`, `/dashboard/a2a`) |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🛰️ **MCP Runtime Heartbeat** | Reel processtatus (pid, oppetid, hjerteslagsalder, transport, omfangstilstand) |
| 📋 **MCP Audit Trail** | Filtrerbare revisionslogfiler med succes/fejl og nøgletilskrivning |
| 🔐 **MCP Scope Enforcement** | 9 granulære omfangstilladelser til kontrolleret værktøjsadgang |
+3 -2
View File
@@ -287,6 +287,7 @@ Wenn ein KI-Gateway dem Netzwerk (LAN, VPS, Docker) zugänglich gemacht wird, ka
- **Anti-Thundering Herd** Mutex + Semaphor-Schutz gegen gleichzeitige Wiederholungsstürme
- **Combo-Fallback-Ketten** Wenn der primäre Anbieter ausfällt, fällt er automatisch durch die Kette, ohne dass ein Eingreifen erforderlich ist
- **Combo Circuit Breaker** Deaktiviert automatisch ausgefallene Anbieter innerhalb einer Combo-Kette
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Gesundheits-Dashboard** Betriebszeitüberwachung, Leistungsschalterzustände, Sperren, Cache-Statistiken, p50/p95/p99-Latenz
</details>
@@ -881,10 +882,10 @@ OmniRoute v2.0 ist als Betriebsplattform konzipiert und nicht nur als Relay-Prox
### 🤖 Agenten- und Protokolloperationen (v2.0)| Funktion | Was es tut |
| ------------------------------------ | -------------------------------------------------------------------------------- |
| 🔧 **MCP-Server (16 Tools)** | IDE-/Agent-Tools steuern Routing, Zustand, Kombinationen, Grenzwerte und Vorgänge | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`)
| 🔧 **MCP-Server (16 Tools)** | IDE-/Agent-Tools steuern Routing, Zustand, Kombinationen, Grenzwerte und Vorgänge | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`)
| 🤝 **A2A-Server (JSON-RPC + SSE)** | Ausführung von Agent-zu-Agent-Aufgaben mit Synchronisierungs- und Streaming-Flows |
| 🧭 **Consolidated Endpoints Page** | Dedizierte Verwaltungsseiten (`/dashboard/mcp`, `/dashboard/a2a`) |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🛰️ **MCP Runtime Heartbeat** | Echter Prozessstatus (PID, Betriebszeit, Heartbeat-Alter, Transport, Scope-Modus) |
| 📋 **MCP Audit Trail** | Filterbare Audit-Protokolle mit Erfolg/Misserfolg und Schlüsselzuordnung |
| 🔐 **Durchsetzung des MCP-Geltungsbereichs** | 9 granulare Umfangsberechtigungen für kontrollierten Werkzeugzugriff |
+1
View File
@@ -868,6 +868,7 @@ npm run electron:build:linux # Linux (.AppImage)
| Característica | Qué Hace |
| ---------------------------------- | ---------------------------------------------------------------------------- |
| 🔌 **Circuit Breaker** | Auto-apertura/cierre por proveedor con umbrales configurables |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-Thundering Herd** | Mutex + semáforo rate-limit para proveedores con API key |
| 🧠 **Caché Semántico** | Caché de dos niveles (firma + semántico) reduce costo y latencia |
| ⚡ **Idempotencia de Solicitud** | Ventana de dedup de 5s para solicitudes duplicadas |
+1
View File
@@ -294,6 +294,7 @@ Tekoälypalveluntarjoajat voivat muuttua epävakaiksi, palauttaa 5xx-virheitä t
- **Anti-Thundering Herd** — Mutex + semaforisuoja samanaikaisia myrskyjä vastaan
- **Yhdistelmävaraketjut** Jos ensisijainen toimittaja epäonnistuu, putoaa automaattisesti ketjun läpi ilman väliintuloa
- **Combo Circuit Breaker** — Poistaa automaattisesti käytöstä vialliset palveluntarjoajat yhdistelmäketjussa
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Health Dashboard** — käytettävyyden valvonta, katkaisijoiden tilat, lukitukset, välimuistitilastot, p50/p95/p99-viive
</details>
+1
View File
@@ -866,6 +866,7 @@ npm run electron:build:linux # Linux (.AppImage)
| Fonctionnalité | Ce qu'elle fait |
| ------------------------------- | ---------------------------------------------------------------------------- |
| 🔌 **Circuit Breaker** | Ouverture/fermeture auto par fournisseur avec seuils configurables |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-Thundering Herd** | Mutex + sémaphore de rate-limit pour les fournisseurs avec clé API |
| 🧠 **Cache sémantique** | Cache à deux niveaux (signature + sémantique) réduit coût et latence |
| ⚡ **Idempotence des requêtes** | Fenêtre de dédup 5s pour les requêtes dupliquées |
+9 -8
View File
@@ -1495,14 +1495,15 @@ Se não quiser criar credenciais próprias agora, ainda é possível usar o flux
ל-OmniRoute יש **210+ תכונות מתוכננות** לאורך שלבי פיתוח מרובים. להלן תחומי המפתח:
| קטגוריה | תכונות מתוכננות | הבהרה |
| --------------------- | --------------- | ---------------------------------------------------------------------------------------- |
| 🧠 **ניתוב ומודיעין** | 25+ | ניתוב עם זמן האחזור הנמוך ביותר, ניתוב מבוסס תגים, בדיקה מוקדמת של מכסה, בחירת חשבון P2C |
| 🔒 **אבטחה ותאימות** | 20+ | הקשחת SSRF, הסוואה של אישורים, הגבלת קצב לכל נקודת קצה, היקף מפתח ניהול |
| 📊 **צפיות** | 15+ | אינטגרציה של OpenTelemetry, ניטור מכסות בזמן אמת, מעקב עלויות לכל דגם |
| 🔄 **שילובי ספקים** | 20+ | רישום מודלים דינמיים, צינון ספקים, Codex מרובה חשבונות, ניתוח מכסת Copilot |
| **ביצועים** | 15+ | שכבת מטמון כפולה, מטמון הנחיה, מטמון תגובה, סטרימינג Keepalive, API אצווה |
| 🌐 **מערכת אקולוגית** | 10+ | WebSocket API, טעינה חוזרת של תצורה, חנות תצורה מבוזרת, מצב מסחרי |
| קטגוריה | תכונות מתוכננות | הבהרה |
| ---------------------------- | ------------------------------------------------------ | ---------------------------------------------------------------------------------------- |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🧠 **ניתוב ומודיעין** | 25+ | ניתוב עם זמן האחזור הנמוך ביותר, ניתוב מבוסס תגים, בדיקה מוקדמת של מכסה, בחירת חשבון P2C |
| 🔒 **אבטחה ותאימות** | 20+ | הקשחת SSRF, הסוואה של אישורים, הגבלת קצב לכל נקודת קצה, היקף מפתח ניהול |
| 📊 **צפיות** | 15+ | אינטגרציה של OpenTelemetry, ניטור מכסות בזמן אמת, מעקב עלויות לכל דגם |
| 🔄 **שילובי ספקים** | 20+ | רישום מודלים דינמיים, צינון ספקים, Codex מרובה חשבונות, ניתוח מכסת Copilot |
| **ביצועים** | 15+ | שכבת מטמון כפולה, מטמון הנחיה, מטמון תגובה, סטרימינג Keepalive, API אצווה |
| 🌐 **מערכת אקולוגית** | 10+ | WebSocket API, טעינה חוזרת של תצורה, חנות תצורה מבוזרת, מצב מסחרי |
### 🔜 בקרוב
+1
View File
@@ -294,6 +294,7 @@ Az AI-szolgáltatók instabillá válhatnak, 5xx-es hibákat adnak vissza, vagy
- **Mennydörgés elleni csorda** - Mutex + szemafor védelem az egyidejű újrapróbálkozási viharok ellen
- **Kombinált tartalék láncok** Ha az elsődleges szolgáltató meghibásodik, automatikusan, beavatkozás nélkül átesik a láncon
- **Combo Circuit Breaker** Automatikusan letiltja a hibás szolgáltatókat a kombinált láncon belül
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Egészségügyi irányítópult** — Üzemidő-figyelés, áramkör-megszakító állapotok, zárolások, gyorsítótár-statisztika, p50/p95/p99 késleltetés
</details>
+1
View File
@@ -294,6 +294,7 @@ Penyedia AI bisa menjadi tidak stabil, menampilkan kesalahan 5xx, atau mencapai
- **Kawanan Anti-Guntur** — Perlindungan mutex + semaphore terhadap badai percobaan ulang secara bersamaan
- **Combo Fallback Chains** — Jika penyedia utama gagal, otomatis gagal dalam rantai tanpa intervensi
- **Combo Circuit Breaker** — Menonaktifkan secara otomatis penyedia yang gagal dalam rantai kombo
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Dasbor Kesehatan** — Pemantauan waktu aktif, status pemutus sirkuit, penguncian, statistik cache, latensi p50/p95/p99
</details>
+9 -8
View File
@@ -1138,14 +1138,15 @@ Cost: $0 forever!
ओम्निरूट ने कई विकास चरणों में **210+ सुविधाओं की योजना बनाई है**। यहां प्रमुख क्षेत्र हैं:
| श्रेणी | नियोजित विशेषताएं | हाइलाइट्स |
| --------------------------- | ----------------- | ------------------------------------------------------------------------------------------- |
| 🧠 **रूटिंग और इंटेलिजेंस** | 25+ | न्यूनतम-विलंबता रूटिंग, टैग-आधारित रूटिंग, कोटा प्रीफ़्लाइट, पी2सी खाता चयन |
| 🔒 **सुरक्षा एवं अनुपालन** | 20+ | एसएसआरएफ हार्डनिंग, क्रेडेंशियल क्लोकिंग, प्रति समापन बिंदु दर-सीमा, प्रबंधन कुंजी स्कोपिंग |
| 📊 **अवलोकनशीलता** | 15+ | ओपन टेलीमेट्री एकीकरण, वास्तविक समय कोटा निगरानी, ​​प्रति मॉडल लागत ट्रैकिंग |
| 🔄 **प्रदाता एकीकरण** | 20+ | डायनेमिक मॉडल रजिस्ट्री, प्रदाता कूलडाउन, मल्टी-अकाउंट कोडेक्स, कोपायलट कोटा पार्सिंग |
| **प्रदर्शन** | 15+ | दोहरी कैश परत, शीघ्र कैश, प्रतिक्रिया कैश, स्ट्रीमिंग कीपलाइव, बैच एपीआई |
| 🌐 **पारिस्थितिकी तंत्र** | 10+ | वेबसॉकेट एपीआई, कॉन्फिग हॉट-रीलोड, वितरित कॉन्फिग स्टोर, वाणिज्यिक मोड |
| श्रेणी | नियोजित विशेषताएं | हाइलाइट्स |
| ---------------------------- | ------------------------------------------------------ | ------------------------------------------------------------------------------------------- |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🧠 **रूटिंग और इंटेलिजेंस** | 25+ | न्यूनतम-विलंबता रूटिंग, टैग-आधारित रूटिंग, कोटा प्रीफ़्लाइट, पी2सी खाता चयन |
| 🔒 **सुरक्षा एवं अनुपालन** | 20+ | एसएसआरएफ हार्डनिंग, क्रेडेंशियल क्लोकिंग, प्रति समापन बिंदु दर-सीमा, प्रबंधन कुंजी स्कोपिंग |
| 📊 **अवलोकनशीलता** | 15+ | ओपन टेलीमेट्री एकीकरण, वास्तविक समय कोटा निगरानी, ​​प्रति मॉडल लागत ट्रैकिंग |
| 🔄 **प्रदाता एकीकरण** | 20+ | डायनेमिक मॉडल रजिस्ट्री, प्रदाता कूलडाउन, मल्टी-अकाउंट कोडेक्स, कोपायलट कोटा पार्सिंग |
| **प्रदर्शन** | 15+ | दोहरी कैश परत, शीघ्र कैश, प्रतिक्रिया कैश, स्ट्रीमिंग कीपलाइव, बैच एपीआई |
| 🌐 **पारिस्थितिकी तंत्र** | 10+ | वेबसॉकेट एपीआई, कॉन्फिग हॉट-रीलोड, वितरित कॉन्फिग स्टोर, वाणिज्यिक मोड |
### 🔜 जल्द आ रहा है
+1
View File
@@ -865,6 +865,7 @@ npm run electron:build:linux # Linux (.AppImage)
| Funzionalità | Cosa Fa |
| ------------------------------- | ---------------------------------------------------------------------------- |
| 🔌 **Circuit Breaker** | Apertura/chiusura auto per provider con soglie configurabili |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-Thundering Herd** | Mutex + semaforo rate-limit per provider con API key |
| 🧠 **Cache semantica** | Cache a due livelli (firma + semantica) riduce costi e latenza |
| ⚡ **Idempotenza richieste** | Finestra dedup 5s per richieste duplicate |
+3
View File
@@ -532,6 +532,7 @@ AIは単なるチャット補完ではありません。開発者は、画像の
- MCP ダッシュボードからコンボのアクティブ化を直接切り替えます
- 事前定義されたポリシーパックから復元プロファイルを適用
- 同じ操作パネルからサーキットブレーカーの状態をリセット
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
</details>
@@ -864,6 +865,7 @@ npm run electron:build:linux # Linux (.AppImage)
| 特集 | 何をするのか |
| -------------------------------- | ---------------------------------------------------------------------------- |
| 🔌 **サーキットブレーカー** | 設定可能なしきい値によるプロバイダーごとの自動開閉 |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **対雷鳴の群れ** | API キープロバイダーのミューテックス + セマフォのレート制限 |
| 🧠 **セマンティック キャッシュ** | 2 層キャッシュ (シグネチャ + セマンティック) によりコストと遅延が削減 |
| ⚡ **冪等性のリクエスト** | 重複リクエストに対する 5 秒の重複除去ウィンドウ |
@@ -964,6 +966,7 @@ Combo: "my-coding-stack"
- システムステータス (稼働時間、バージョン、メモリ使用量)
- プロバイダーごとのサーキットブレーカーの状態 (クローズ/オープン/ハーフオープン)
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- レート制限ステータスとアクティブなロックアウト
- 署名キャッシュ統計
- レイテンシ テレメトリ (p50/p95/p99) + プロンプト キャッシュ
+9 -8
View File
@@ -1495,14 +1495,15 @@ Se não quiser criar credenciais próprias agora, ainda é possível usar o flux
OmniRoute에는 여러 개발 단계에 걸쳐 **210개 이상의 기능이 계획되어 있습니다**. 주요 영역은 다음과 같습니다.
| 카테고리 | 계획된 기능 | 하이라이트 |
| --------------------------- | ----------- | ------------------------------------------------------------------------------ |
| 🧠 **라우팅 및 인텔리전스** | 25세 이상 | 최저 대기 시간 라우팅, 태그 기반 라우팅, 실행 전 할당량, P2C 계정 선택 |
| 🔒 **보안 및 규정 준수** | 20세 이상 | SSRF 강화, 자격 증명 클로킹, 엔드포인트당 속도 제한, 관리 키 범위 지정 |
| 📊 **관측성** | 15세 이상 | OpenTelemetry 통합, 실시간 할당량 모니터링, 모델별 비용 추적 |
| 🔄 **공급자 통합** | 20세 이상 | 동적 모델 레지스트리, 공급자 쿨다운, 다중 계정 Codex, Copilot 할당량 구문 분석 |
| **성능** | 15세 이상 | 듀얼 캐시 레이어, 프롬프트 캐시, 응답 캐시, 스트리밍 Keepalive, 배치 API |
| 🌐 **생태계** | 10세 이상 | WebSocket API, 구성 핫 리로드, 분산 구성 저장소, 상용 모드 |
| 카테고리 | 계획된 기능 | 하이라이트 |
| ---------------------------- | ------------------------------------------------------ | ------------------------------------------------------------------------------ |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🧠 **라우팅 및 인텔리전스** | 25세 이상 | 최저 대기 시간 라우팅, 태그 기반 라우팅, 실행 전 할당량, P2C 계정 선택 |
| 🔒 **보안 및 규정 준수** | 20세 이상 | SSRF 강화, 자격 증명 클로킹, 엔드포인트당 속도 제한, 관리 키 범위 지정 |
| 📊 **관측성** | 15세 이상 | OpenTelemetry 통합, 실시간 할당량 모니터링, 모델별 비용 추적 |
| 🔄 **공급자 통합** | 20세 이상 | 동적 모델 레지스트리, 공급자 쿨다운, 다중 계정 Codex, Copilot 할당량 구문 분석 |
| **성능** | 15세 이상 | 듀얼 캐시 레이어, 프롬프트 캐시, 응답 캐시, 스트리밍 Keepalive, 배치 API |
| 🌐 **생태계** | 10세 이상 | WebSocket API, 구성 핫 리로드, 분산 구성 저장소, 상용 모드 |
### 🔜 출시 예정
+17 -16
View File
@@ -47,7 +47,7 @@ _Your universal API proxy — one endpoint, 36+ providers, zero downtime. Now wi
| **Settings** | ![Settings](docs/screenshots/06-settings.png) |
| **CLI Tools** | ![CLI Tools](docs/screenshots/07-cli-tools.png) |
| **Usage Logs** | ![Usage](docs/screenshots/08-usage.png) |
| **Endpoints** | ![Endpoints](docs/screenshots/09-endpoint.png) |
| **Endpoints** | ![Endpoints](docs/screenshots/09-endpoint.png) |
</details>
@@ -289,7 +289,7 @@ AI providers can become unstable, return 5xx errors, or hit temporary rate limit
**How OmniRoute solves it:**
- **Circuit Breaker per-provider** — Auto-open/close with configurable thresholds and cooldown (Closed/Open/Half-Open)
- **Circuit Breaker per-model** — Auto-open/close with configurable thresholds and cooldown (Closed/Open/Half-Open), scoped per-model to avoid cascading blocks
- **Exponential Backoff** — Progressive retry delays
- **Anti-Thundering Herd** — Mutex + semaphore protection against concurrent retry storms
- **Combo Fallback Chains** — If the primary provider fails, automatically falls through the chain with no intervention
@@ -890,19 +890,19 @@ OmniRoute v2.0 is built as an operational platform, not just a relay proxy.
### 🤖 Agent & Protocol Operations (v2.0)
| Feature | What It Does |
| ------------------------------------ | -------------------------------------------------------------------------------- |
| 🔧 **MCP Server (16 tools)** | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`) |
| 🤝 **A2A Server (JSON-RPC + SSE)** | Agent-to-agent task execution with sync and streaming flows |
| 🧭 **Consolidated Endpoints Page** | Tabbed management page with Endpoint Proxy, MCP, A2A, and API Endpoints tabs |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🛰️ **MCP Runtime Heartbeat** | Real process status (pid, uptime, heartbeat age, transport, scope mode) |
| 📋 **MCP Audit Trail** | Filterable audit logs with success/failure and key attribution |
| 🔐 **MCP Scope Enforcement** | 9 granular scope permissions for controlled tool access |
| 📡 **A2A Task Lifecycle Management** | List/filter tasks, inspect events/artifacts, cancel running tasks |
| 📋 **Agent Card Discovery** | `/.well-known/agent.json` for client auto-discovery |
| 🧪 **Protocol E2E Test Harness** | Real MCP SDK + A2A client flows in `test:protocols:e2e` |
| ⚙️ **Operational Controls** | Switch combo, apply resilience profiles, reset breakers from one control surface |
| Feature | What It Does |
| ------------------------------------- | -------------------------------------------------------------------------------------------------- |
| 🔧 **MCP Server (16 tools)** | IDE/agent tools via 3 transports: stdio, SSE (`/api/mcp/sse`), Streamable HTTP (`/api/mcp/stream`) |
| 🤝 **A2A Server (JSON-RPC + SSE)** | Agent-to-agent task execution with sync and streaming flows |
| 🧭 **Consolidated Endpoints Page** | Tabbed management page with Endpoint Proxy, MCP, A2A, and API Endpoints tabs |
| 🎚️ **Service Enable/Disable Toggles** | ON/OFF switches for MCP and A2A with settings persistence (default: OFF) |
| 🛰️ **MCP Runtime Heartbeat** | Real process status (pid, uptime, heartbeat age, transport, scope mode) |
| 📋 **MCP Audit Trail** | Filterable audit logs with success/failure and key attribution |
| 🔐 **MCP Scope Enforcement** | 9 granular scope permissions for controlled tool access |
| 📡 **A2A Task Lifecycle Management** | List/filter tasks, inspect events/artifacts, cancel running tasks |
| 📋 **Agent Card Discovery** | `/.well-known/agent.json` for client auto-discovery |
| 🧪 **Protocol E2E Test Harness** | Real MCP SDK + A2A client flows in `test:protocols:e2e` |
| ⚙️ **Operational Controls** | Switch combo, apply resilience profiles, reset breakers from one control surface |
### 🧠 Routing & Intelligence
@@ -938,7 +938,8 @@ OmniRoute v2.0 is built as an operational platform, not just a relay proxy.
| Feature | What It Does |
| ----------------------------------- | ---------------------------------------------------------- |
| 🔌 **Circuit Breakers** | Provider-level trip/recover with threshold controls |
| 🔌 **Circuit Breakers** | Per-model trip/recover with threshold controls |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-Thundering Herd** | Mutex + semaphore protections on retry/rate events |
| 🧠 **Semantic + Signature Cache** | Cost/latency reduction with two cache layers |
| ⚡ **Request Idempotency** | Duplicate protection window |
+9 -8
View File
@@ -1495,14 +1495,15 @@ Jika anda ingin mendapatkan credenciais próprias agora, ada kemungkinan penggun
OmniRoute mempunyai **210+ ciri yang dirancang** merentas berbilang fasa pembangunan. Berikut adalah bidang utama:
| Kategori | Ciri Terancang | Sorotan |
| ------------------------------ | -------------- | ---------------------------------------------------------------------------------------------------- |
| 🧠 **Penghalaan & Perisikan** | 25+ | Penghalaan kependaman terendah, penghalaan berasaskan teg, kuota prapenerbangan, pemilihan akaun P2C |
| 🔒 **Keselamatan & Pematuhan** | 20+ | Pengerasan SSRF, penyelubungan kelayakan, had kadar setiap titik akhir, skop kunci pengurusan |
| 📊 **Kebolehlihatan** | 15+ | Penyepaduan OpenTelemetry, pemantauan kuota masa nyata, penjejakan kos setiap model |
| 🔄 **Integrasi Pembekal** | 20+ | Pendaftaran model dinamik, penyejukan pembekal, Codex berbilang akaun, penghuraian kuota Copilot |
| **Prestasi** | 15+ | Lapisan cache dwi, cache gesaan, cache respons, penstriman keepalive, API kelompok |
| 🌐 **Ekosistem** | 10+ | API WebSocket, konfigurasi hot-reload, kedai konfigurasi teragih, mod komersial |
| Kategori | Ciri Terancang | Sorotan |
| ------------------------------ | ------------------------------------------------------ | ---------------------------------------------------------------------------------------------------- |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🧠 **Penghalaan & Perisikan** | 25+ | Penghalaan kependaman terendah, penghalaan berasaskan teg, kuota prapenerbangan, pemilihan akaun P2C |
| 🔒 **Keselamatan & Pematuhan** | 20+ | Pengerasan SSRF, penyelubungan kelayakan, had kadar setiap titik akhir, skop kunci pengurusan |
| 📊 **Kebolehlihatan** | 15+ | Penyepaduan OpenTelemetry, pemantauan kuota masa nyata, penjejakan kos setiap model |
| 🔄 **Integrasi Pembekal** | 20+ | Pendaftaran model dinamik, penyejukan pembekal, Codex berbilang akaun, penghuraian kuota Copilot |
| **Prestasi** | 15+ | Lapisan cache dwi, cache gesaan, cache respons, penstriman keepalive, API kelompok |
| 🌐 **Ekosistem** | 10+ | API WebSocket, konfigurasi hot-reload, kedai konfigurasi teragih, mod komersial |
### 🔜 Akan Datang
+9 -8
View File
@@ -1495,14 +1495,15 @@ Als u geen geloofwaardige geloofwaardigheid meer heeft, is het mogelijk om de st
OmniRoute heeft **210+ functies gepland** over meerdere ontwikkelingsfasen. Dit zijn de belangrijkste gebieden:
| Categorie | Geplande functies | Hoogtepunten |
| ------------------------------- | ----------------- | --------------------------------------------------------------------------------------------------------- |
| 🧠 **Routing en intelligentie** | 25+ | Routering met de laagste latentie, op tags gebaseerde routering, quota-preflight, P2C-accountselectie |
| 🔒 **Beveiliging en naleving** | 20+ | SSRF-verharding, cloaking van inloggegevens, snelheidslimiet per eindpunt, scoping van beheersleutels |
| 📊 **Waarneembaarheid** | 15+ | OpenTelemetry-integratie, realtime quotabewaking, kostenregistratie per model |
| 🔄 **Provider-integraties** | 20+ | Dynamisch modelregister, cooldowns van providers, Codex met meerdere accounts, parseren van Copilot-quota |
| **Prestaties** | 15+ | Dubbele cachelaag, promptcache, responscache, streaming keepalive, batch-API |
| 🌐 **Ecosysteem** | 10+ | WebSocket API, configuratie hot-reload, gedistribueerde configuratieopslag, commerciële modus |
| Categorie | Geplande functies | Hoogtepunten |
| ------------------------------- | ------------------------------------------------------ | --------------------------------------------------------------------------------------------------------- |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🧠 **Routing en intelligentie** | 25+ | Routering met de laagste latentie, op tags gebaseerde routering, quota-preflight, P2C-accountselectie |
| 🔒 **Beveiliging en naleving** | 20+ | SSRF-verharding, cloaking van inloggegevens, snelheidslimiet per eindpunt, scoping van beheersleutels |
| 📊 **Waarneembaarheid** | 15+ | OpenTelemetry-integratie, realtime quotabewaking, kostenregistratie per model |
| 🔄 **Provider-integraties** | 20+ | Dynamisch modelregister, cooldowns van providers, Codex met meerdere accounts, parseren van Copilot-quota |
| **Prestaties** | 15+ | Dubbele cachelaag, promptcache, responscache, streaming keepalive, batch-API |
| 🌐 **Ecosysteem** | 10+ | WebSocket API, configuratie hot-reload, gedistribueerde configuratieopslag, commerciële modus |
### 🔜 Binnenkort beschikbaar
+1
View File
@@ -864,6 +864,7 @@ npm run electron:build:linux # Linux (.AppImage)
| Funksjon | Hva det gjør |
| --------------------------------- | ----------------------------------------------------------------------------- |
| 🔌 **Circuit Breaker** | Automatisk åpning/lukking per leverandør med konfigurerbare terskler |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-tordenflokk** | Mutex + semaforhastighetsgrense for API-nøkkelleverandører |
| 🧠 **Semantisk cache** | To-lags cache (signatur + semantisk) reduserer kostnader og ventetid |
| ⚡ **Be om idempotens** | 5s dedup-vindu for dupliserte forespørsler |
+2 -1
View File
@@ -289,7 +289,7 @@ Ang mga tagapagbigay ng AI ay maaaring maging hindi matatag, magbalik ng 5xx na
**Paano ito niresolba ng OmniRoute:**
- **Circuit Breaker per-provider** — Awtomatikong buksan/sarado na may mga na-configure na threshold at cooldown (Sarado/Bukas/Kalahating Bukas)
- **Circuit Breaker per-model** — Awtomatikong buksan/sarado na may mga na-configure na threshold at cooldown (Sarado/Bukas/Kalahating Bukas)
- **Exponential Backoff** — Progressive retry delays
- **Anti-Thundering Herd** — Mutex + semaphore na proteksyon laban sa kasabay na muling pagsubok na mga bagyo
- **Combo Fallback Chains** — Kung nabigo ang pangunahing provider, awtomatikong mahuhulog sa chain nang walang interbensyon
@@ -864,6 +864,7 @@ npm run electron:build:linux # Linux (.AppImage)
| Tampok | Ano ang Ginagawa Nito |
| ----------------------------------------- | ------------------------------------------------------------------------------------- |
| 🔌 **Circuit Breaker** | Awtomatikong buksan/isara ang bawat provider na may mga na-configure na threshold |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-Thundering Herd** | Mutex + semaphore rate-limit para sa mga API key provider |
| 🧠 **Semantic Cache** | Binabawasan ng two-tier na cache (pirma + semantiko) ang gastos at latency |
| ⚡ **Humiling ng Idempotency** | 5s dedup window para sa mga duplicate na kahilingan |
+9 -8
View File
@@ -1495,14 +1495,15 @@ Jeśli chcesz uzyskać dostęp do **podręcznika URL**:
OmniRoute ma **ponad 210 funkcji zaplanowanych** w wielu fazach rozwoju. Oto kluczowe obszary:
| Kategoria | Planowane funkcje | Najważniejsze |
| -------------------------------- | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| 🧠 **Routing i inteligencja** | 25+ | Routing z najmniejszym opóźnieniem, routing oparty na tagach, wstępna inspekcja przydziału, wybór konta P2C |
| 🔒 **Bezpieczeństwo i zgodność** | 20+ | Wzmocnienie SSRF, maskowanie poświadczeń, limit szybkości na punkt końcowy, zakres kluczy zarządzania |
| 📊 **Obserwowalność** | 15+ | Integracja OpenTelemetry, monitorowanie kwot w czasie rzeczywistym, śledzenie kosztów według modelu |
| 🔄 **Integracja dostawców** | 20+ | Rejestr modeli dynamicznych, czasy odnowienia dostawcy, Kodeks dla wielu kont, analiza przydziału Copilot |
| **Wydajność** | 15+ | Podwójna warstwa pamięci podręcznej, pamięć podręczna podpowiedzi, pamięć podręczna odpowiedzi, utrzymywanie transmisji strumieniowej, wsadowe API |
| 🌐 **Ekosystem** | 10+ | WebSocket API, ładowanie konfiguracji na gorąco, rozproszony magazyn konfiguracji, tryb komercyjny |
| Kategoria | Planowane funkcje | Najważniejsze |
| -------------------------------- | ------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🧠 **Routing i inteligencja** | 25+ | Routing z najmniejszym opóźnieniem, routing oparty na tagach, wstępna inspekcja przydziału, wybór konta P2C |
| 🔒 **Bezpieczeństwo i zgodność** | 20+ | Wzmocnienie SSRF, maskowanie poświadczeń, limit szybkości na punkt końcowy, zakres kluczy zarządzania |
| 📊 **Obserwowalność** | 15+ | Integracja OpenTelemetry, monitorowanie kwot w czasie rzeczywistym, śledzenie kosztów według modelu |
| 🔄 **Integracja dostawców** | 20+ | Rejestr modeli dynamicznych, czasy odnowienia dostawcy, Kodeks dla wielu kont, analiza przydziału Copilot |
| **Wydajność** | 15+ | Podwójna warstwa pamięci podręcznej, pamięć podręczna podpowiedzi, pamięć podręczna odpowiedzi, utrzymywanie transmisji strumieniowej, wsadowe API |
| 🌐 **Ekosystem** | 10+ | WebSocket API, ładowanie konfiguracji na gorąco, rozproszony magazyn konfiguracji, tryb komercyjny |
### 🔜 Już wkrótce
+3 -2
View File
@@ -289,7 +289,7 @@ Provedores de IA podem ficar instáveis, retornar erro 5xx ou atingir limites te
**Como o OmniRoute resolve isso:**
- **Circuit Breaker por provedor** — Abre/fecha automaticamente com limiares e cooldown configuráveis (Closed/Open/Half-Open)
- **Circuit Breaker por modelo** — Abre/fecha automaticamente com limiares e cooldown configuráveis (Closed/Open/Half-Open)
- **Exponential Backoff** — Atrasos progressivos de retry
- **Anti-Thundering Herd** — Proteção com mutex + semáforo contra tempestade de retries concorrentes
- **Cadeias de Fallback em Combo** — Se o primário falhar, avança automaticamente na cadeia sem intervenção
@@ -871,7 +871,8 @@ Por que isso é relevante:
| Funcionalidade | O que Faz |
| ----------------------------------- | ----------------------------------------------------------------------------- |
| 🔌 **Circuit Breaker** | Auto-abertura/fechamento por provedor com limites configuráveis |
| 🔌 **Circuit Breaker** | Trip/recover por modelo com limites configuráveis |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-Thundering Herd** | Mutex + semáforo rate-limit para provedores com API key |
| 🧠 **Cache Semântico** | Cache de duas camadas (assinatura + semântico) reduz custo e latência |
| ⚡ **Idempotência de Requisição** | Janela de dedup de 5s para requisições duplicadas |
+1
View File
@@ -294,6 +294,7 @@ Os provedores de IA podem ficar instáveis, retornar erros 5xx ou atingir limite
- **Rebanho Anti-Trovão** — Proteção Mutex + semáforo contra tempestades de novas tentativas simultâneas
- **Combo Fallback Chains** — Se o provedor primário falhar, ele cairá automaticamente na cadeia sem intervenção
- **Combo Circuit Breaker** — Desativa automaticamente provedores com falha em uma cadeia de combinação
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Health Dashboard** — Monitoramento de tempo de atividade, estados de disjuntores, bloqueios, estatísticas de cache, latência p50/p95/p99
</details>
+2
View File
@@ -290,10 +290,12 @@ Furnizorii de AI pot deveni instabili, pot returna erori 5xx sau pot atinge limi
**Cum o rezolvă OmniRoute:**
- **Circuit Breaker per furnizor** - Deschidere/închidere automată cu praguri configurabile și răcire (Închis/Deschis/Pe jumătate deschis)
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Backoff exponențial** — Întârzieri progresive ale reîncercării
- **Anti-Thundering Herd** — Mutex + protecție semafor împotriva furtunilor concurente de reîncercare
- **Combo Fallback Chains** — Dacă furnizorul principal eșuează, trece automat prin lanț fără nicio intervenție
- **Combo Circuit Breaker** — Dezactivează automat furnizorii care eșuează dintr-un lanț combinat
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Tabloul de bord pentru sănătate** — Monitorizare timp de funcționare, stări întrerupătoare de circuit, blocări, statistici cache, latență p50/p95/p99
</details>
+1
View File
@@ -864,6 +864,7 @@ npm run electron:build:linux # Linux (.AppImage)
| Функция | Что делает |
| -------------------------------- | ---------------------------------------------------------------------------- |
| 🔌 **Circuit Breaker** | Авто-открытие/закрытие по провайдеру с настраиваемыми порогами |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-Thundering Herd** | Mutex + семафор для API key провайдеров |
| 🧠 **Семантический кеш** | Двухуровневый кеш (сигнатура + семантика) снижает стоимость |
| ⚡ **Идемпотентность запросов** | 5с окно дедупликации для дублирующихся запросов |
+1
View File
@@ -295,6 +295,7 @@ Poskytovatelia AI sa môžu stať nestabilnými, vrátiť chyby 5xx alebo dosiah
- **Anti-Thundering Herd** - ochrana Mutex + semafor proti súbežným opakovaným búrkam
- **Combo Fallback Chains** Ak primárny poskytovateľ zlyhá, automaticky prepadne reťazcom bez akéhokoľvek zásahu
- **Combo Circuit Breaker** Automaticky deaktivuje zlyhávajúcich poskytovateľov v rámci kombinovaného reťazca
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
**Health Dashboard** Monitorovanie dostupnosti, stavy ističov, blokovania, štatistiky vyrovnávacej pamäte, latencia p50/p95/p99
</details>
+1
View File
@@ -864,6 +864,7 @@ npm run electron:build:linux # Linux (.AppImage)
| Funktion | Vad det gör |
| -------------------------------------- | --------------------------------------------------------------------------------- |
| 🔌 **Circuit Breaker** | Autoöppna/stäng per leverantör med konfigurerbara trösklar |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| 🛡️ **Anti-ånflock** | Mutex + semaforhastighetsgräns för API-nyckelleverantörer |
| 🧠 **Semantisk cache** | Tvåskiktscache (signatur + semantisk) minskar kostnaden och fördröjningen |
| ⚡ **Begär idempotens** | 5s dedup-fönster för dubblettförfrågningar |
+1
View File
@@ -294,6 +294,7 @@ OpenAI ใช้รูปแบบหนึ่ง Claude (Anthropic) ใช้
- **Anti-Thundering Herd** — Mutex + การป้องกันเซมาฟอร์จากพายุที่ลองใหม่พร้อมกัน
- **Combo Fallback Chains** — หากผู้ให้บริการหลักล้มเหลว จะตกผ่านห่วงโซ่โดยอัตโนมัติโดยไม่มีการแทรกแซง
- **Combo Circuit Breaker** — ปิดการใช้งานผู้ให้บริการที่ล้มเหลวภายในคอมโบเชนโดยอัตโนมัติ
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **แดชบอร์ดสุขภาพ** — การตรวจสอบสถานะการออนไลน์ สถานะของเซอร์กิตเบรกเกอร์ การล็อก สถิติแคช เวลาแฝง p50/p95/p99
</details>
+1
View File
@@ -294,6 +294,7 @@ OpenAI використовує один формат, Claude (Anthropic) вик
- **Anti-Thundering Herd** — Mutex + захист семафора від одночасних повторних штормів
- **Комбіновані запасні ланцюги** — якщо основний постачальник виходить з ладу, автоматично проходить через ланцюжок без втручання.
- **Combo Circuit Breaker** — автоматично вимикає несправні постачальники в комбінованому ланцюжку
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Health Dashboard** — Моніторинг безвідмовної роботи, стани автоматичного вимикача, блокування, статистика кешу, затримка p50/p95/p99
</details>
+1
View File
@@ -294,6 +294,7 @@ Các nhà cung cấp AI có thể trở nên không ổn định, trả về l
- **Bầy chống sấm sét** — Mutex + bảo vệ semaphore chống lại các cơn bão thử lại đồng thời
- **Chuỗi dự phòng kết hợp** — Nếu nhà cung cấp chính không thành công, nó sẽ tự động rơi qua chuỗi mà không cần can thiệp
- **Combo Circuit Breaker** — Tự động vô hiệu hóa các nhà cung cấp bị lỗi trong chuỗi kết hợp
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
- **Bảng thông tin sức khỏe** — Giám sát thời gian hoạt động, trạng thái ngắt mạch, khóa, số liệu thống kê bộ nhớ đệm, độ trễ p50/p95/p99
</details>
+10 -9
View File
@@ -1348,15 +1348,16 @@ codex "your prompt"
## 📖 文档
| 文档 | 描述 |
| ----------------------------------- | ---------------------------- |
| [用户指南](docs/USER_GUIDE.md) | 提供商、组合、CLI 集成、部署 |
| [API 参考](docs/API_REFERENCE.md) | 所有端点及示例 |
| [故障排除](docs/TROUBLESHOOTING.md) | 常见问题和解决方案 |
| [架构](docs/ARCHITECTURE.md) | 系统架构和内部机制 |
| [贡献指南](CONTRIBUTING.md) | 开发设置和指南 |
| [OpenAPI 规范](docs/openapi.yaml) | OpenAPI 3.0 规范 |
| [安全策略](SECURITY.md) | 漏洞报告和安全实践 |
| 文档 | 描述 |
| ----------------------------------- | ------------------------------------------------------ |
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
| [用户指南](docs/USER_GUIDE.md) | 提供商、组合、CLI 集成、部署 |
| [API 参考](docs/API_REFERENCE.md) | 所有端点及示例 |
| [故障排除](docs/TROUBLESHOOTING.md) | 常见问题和解决方案 |
| [架构](docs/ARCHITECTURE.md) | 系统架构和内部机制 |
| [贡献指南](CONTRIBUTING.md) | 开发设置和指南 |
| [OpenAPI 规范](docs/openapi.yaml) | OpenAPI 3.0 规范 |
| [安全策略](SECURITY.md) | 漏洞报告和安全实践 |
---
+121
View File
@@ -26,10 +26,12 @@ const {
nativeImage,
shell,
session,
Notification,
} = require("electron");
const path = require("path");
const { spawn } = require("child_process");
const fs = require("fs");
const { autoUpdater } = require("electron-updater");
// ── Single Instance Lock ───────────────────────────────────
const gotTheLock = app.requestSingleInstanceLock();
@@ -62,6 +64,11 @@ let serverPort = 20128;
const getServerUrl = () => `http://localhost:${serverPort}`;
// ── Auto-Updater Configuration ──────────────────────────────
autoUpdater.autoDownload = false;
autoUpdater.autoInstallOnAppQuit = true;
autoUpdater.logger = console;
// ── Helper: Send IPC event to renderer (#5) ────────────────
function sendToRenderer(channel, data) {
if (mainWindow && !mainWindow.isDestroyed()) {
@@ -103,6 +110,77 @@ async function waitForServerExit(proc, timeoutMs = 5000) {
]);
}
// ── Auto-Updater Event Handlers ─────────────────────────────
function setupAutoUpdater() {
autoUpdater.on("checking-for-update", () => {
sendToRenderer("update-status", { status: "checking" });
console.log("[Electron] Checking for updates...");
});
autoUpdater.on("update-available", (info) => {
sendToRenderer("update-status", { status: "available", version: info.version });
console.log("[Electron] Update available:", info.version);
});
autoUpdater.on("update-not-available", (info) => {
sendToRenderer("update-status", { status: "not-available", version: info.version });
console.log("[Electron] No update available");
});
autoUpdater.on("download-progress", (progress) => {
sendToRenderer("update-status", {
status: "downloading",
percent: Math.round(progress.percent),
transferred: progress.transferred,
total: progress.total,
});
});
autoUpdater.on("update-downloaded", (info) => {
sendToRenderer("update-status", { status: "downloaded", version: info.version });
console.log("[Electron] Update downloaded:", info.version);
if (Notification.isSupported()) {
const notification = new Notification({
title: "OmniRoute Update Ready",
body: `Version ${info.version} is ready to install. Click to restart.`,
});
notification.on("click", () => {
autoUpdater.quitAndInstall();
});
notification.show();
}
});
autoUpdater.on("error", (error) => {
sendToRenderer("update-status", { status: "error", message: error.message });
console.error("[Electron] Update error:", error);
});
}
async function checkForUpdates(silent = false) {
if (isDev) {
console.log("[Electron] Dev mode — skipping auto-update");
if (!silent) {
sendToRenderer("update-status", { status: "error", message: "Updates disabled in dev mode" });
}
return;
}
await autoUpdater.checkForUpdates();
}
async function downloadUpdate() {
await autoUpdater.downloadUpdate();
}
function installUpdate() {
if (nextServer) {
nextServer.kill("SIGTERM");
nextServer = null;
}
autoUpdater.quitAndInstall();
}
// ── Content Security Policy (#15) ──────────────────────────
function setupContentSecurityPolicy() {
session.defaultSession.webRequest.onHeadersReceived((details, callback) => {
@@ -236,6 +314,11 @@ function createTray() {
],
},
{ type: "separator" },
{
label: "Check for Updates",
click: () => checkForUpdates(false),
},
{ type: "separator" },
{
label: "Quit",
click: () => {
@@ -391,6 +474,36 @@ function setupIpcHandlers() {
});
ipcMain.on("window-close", () => mainWindow?.close());
// Auto-update IPC handlers
ipcMain.handle("check-for-updates", async () => {
try {
await checkForUpdates(false);
return { success: true };
} catch (error) {
console.error("[Electron] Check for updates failed:", error);
sendToRenderer("update-status", { status: "error", message: error.message });
return { success: false, error: error.message };
}
});
ipcMain.handle("download-update", async () => {
try {
await downloadUpdate();
return { success: true };
} catch (error) {
console.error("[Electron] Download update failed:", error);
sendToRenderer("update-status", { status: "error", message: error.message });
return { success: false, error: error.message };
}
});
ipcMain.handle("install-update", () => {
installUpdate();
// No return value — app will quit and restart
});
ipcMain.handle("get-app-version", () => app.getVersion());
}
// ── App Lifecycle ──────────────────────────────────────────
@@ -407,6 +520,14 @@ app.whenReady().then(async () => {
createWindow();
createTray();
setupIpcHandlers();
setupAutoUpdater();
// Check for updates after a short delay (don't block startup)
if (!isDev) {
setTimeout(() => {
checkForUpdates(true);
}, 3000);
}
// macOS: recreate window when dock icon clicked
app.on("activate", () => {
+8 -1
View File
@@ -15,7 +15,9 @@
"build:linux": "electron-builder --linux",
"pack": "electron-builder --dir"
},
"dependencies": {},
"dependencies": {
"electron-updater": "^6.8.3"
},
"devDependencies": {
"electron": "^40.6.1",
"electron-builder": "^25.1.8"
@@ -28,6 +30,11 @@
"output": "dist-electron",
"buildResources": "assets"
},
"publish": {
"provider": "github",
"owner": "diegosouzapw",
"repo": "OmniRoute"
},
"files": [
"main.js",
"preload.js",
+18 -2
View File
@@ -13,9 +13,18 @@ const { contextBridge, ipcRenderer } = require("electron");
// ── Channel Whitelist ──────────────────────────────────────
const VALID_CHANNELS = {
invoke: ["get-app-info", "open-external", "get-data-dir", "restart-server"],
invoke: [
"get-app-info",
"open-external",
"get-data-dir",
"restart-server",
"check-for-updates",
"download-update",
"install-update",
"get-app-version",
],
send: ["window-minimize", "window-maximize", "window-close"],
receive: ["server-status", "port-changed"],
receive: ["server-status", "port-changed", "update-status"],
};
// ── Fix #16: Generic IPC wrappers ──────────────────────────
@@ -48,6 +57,12 @@ contextBridge.exposeInMainWorld("electronAPI", {
openExternal: (url) => safeInvoke("open-external", url),
getDataDir: () => safeInvoke("get-data-dir"),
restartServer: () => safeInvoke("restart-server"),
getAppVersion: () => safeInvoke("get-app-version"),
// ── Auto-Update ──────────────────────────────────────────
checkForUpdates: () => safeInvoke("check-for-updates"),
downloadUpdate: () => safeInvoke("download-update"),
installUpdate: () => safeInvoke("install-update"),
// ── Send (fire-and-forget) ───────────────────────────────
minimizeWindow: () => safeSend("window-minimize"),
@@ -58,6 +73,7 @@ contextBridge.exposeInMainWorld("electronAPI", {
// Fix #6: Returns a disposer function for precise cleanup
onServerStatus: (callback) => safeOn("server-status", callback),
onPortChanged: (callback) => safeOn("port-changed", callback),
onUpdateStatus: (callback) => safeOn("update-status", callback),
// ── Static Properties ────────────────────────────────────
isElectron: true,
+1 -1
View File
@@ -6,7 +6,7 @@ const withNextIntl = createNextIntlPlugin("./src/i18n/request.ts");
const nextConfig = {
turbopack: {},
output: "standalone",
serverExternalPackages: ["better-sqlite3"],
serverExternalPackages: ["better-sqlite3", "zod"],
transpilePackages: ["@omniroute/open-sse"],
allowedDevOrigins: ["192.168.*"],
typescript: {
+50 -6
View File
@@ -30,9 +30,23 @@ import {
* @param {object} options.body - Request body
* @param {object} options.credentials - Provider credentials { apiKey, accessToken }
* @param {object} options.log - Logger
* @param {string} [options.resolvedProvider] - Pre-resolved provider ID (from route layer custom model resolution)
*/
export async function handleImageGeneration({ body, credentials, log }) {
const { provider, model } = parseImageModel(body.model);
export async function handleImageGeneration({ body, credentials, log, resolvedProvider = null }) {
let provider, model;
if (resolvedProvider) {
// Provider was already resolved by the route layer (custom model from DB)
// Extract model name from the full "provider/model" string
provider = resolvedProvider;
const modelStr = body.model || "";
model = modelStr.startsWith(provider + "/") ? modelStr.slice(provider.length + 1) : modelStr;
} else {
// Standard path: resolve from built-in image registry
const parsed = parseImageModel(body.model);
provider = parsed.provider;
model = parsed.model;
}
if (!provider) {
return {
@@ -43,12 +57,42 @@ export async function handleImageGeneration({ body, credentials, log }) {
}
const providerConfig = getImageProvider(provider);
// For custom models without a built-in provider config, use OpenAI-compatible handler
// with a synthetic config based on the provider's credentials
if (!providerConfig) {
return {
success: false,
status: 400,
error: `Unknown image provider: ${provider}`,
if (!resolvedProvider) {
return {
success: false,
status: 400,
error: `Unknown image provider: ${provider}`,
};
}
// Custom model: use OpenAI-compatible format with provider's base URL
// The credentials were already resolved by the route layer
if (log) {
log.info("IMAGE", `Custom model ${provider}/${model} — using OpenAI-compatible handler`);
}
const syntheticConfig = {
id: provider,
baseUrl:
credentials?.baseUrl ||
`https://generativelanguage.googleapis.com/v1beta/openai/images/generations`,
authType: "apikey",
authHeader: "bearer",
format: "openai",
};
return handleOpenAIImageGeneration({
model,
provider,
providerConfig: syntheticConfig,
body,
credentials,
log,
});
}
// Route to format-specific handler
+6 -6
View File
@@ -310,7 +310,8 @@ export async function handleComboChat({
const parsed = parseModel(modelStr);
const provider = parsed.provider || parsed.providerAlias || "unknown";
const profile = getProviderProfile(provider);
const breaker = getCircuitBreaker(`combo:${provider}`, {
const breakerKey = `combo:${modelStr}`;
const breaker = getCircuitBreaker(breakerKey, {
failureThreshold: profile.circuitBreakerThreshold,
resetTimeout: profile.circuitBreakerReset,
});
@@ -440,8 +441,7 @@ export async function handleComboChat({
// Early exit: check if all models have breaker OPEN
const allBreakersOpen = orderedModels.every((m) => {
const p = parseModel(m).provider || parseModel(m).providerAlias || "unknown";
return !getCircuitBreaker(`combo:${p}`).canExecute();
return !getCircuitBreaker(`combo:${m}`).canExecute();
});
// All models failed
@@ -532,7 +532,8 @@ async function handleRoundRobinCombo({
const parsed = parseModel(modelStr);
const provider = parsed.provider || parsed.providerAlias || "unknown";
const profile = getProviderProfile(provider);
const breaker = getCircuitBreaker(`combo:${provider}`, {
const breakerKey = `combo:${modelStr}`;
const breaker = getCircuitBreaker(breakerKey, {
failureThreshold: profile.circuitBreakerThreshold,
resetTimeout: profile.circuitBreakerReset,
});
@@ -694,8 +695,7 @@ async function handleRoundRobinCombo({
// Early exit: check if all models have breaker OPEN
const allBreakersOpen = orderedModels.every((m) => {
const p = parseModel(m).provider || parseModel(m).providerAlias || "unknown";
return !getCircuitBreaker(`combo:${p}`).canExecute();
return !getCircuitBreaker(`combo:${m}`).canExecute();
});
if (allBreakersOpen) {
+19 -16
View File
@@ -3,6 +3,7 @@
*/
import { PROVIDERS } from "../config/constants.ts";
import { safePercentage } from "@/shared/utils/formatting";
// GitHub API config
const GITHUB_CONFIG = {
@@ -34,6 +35,7 @@ const CLAUDE_CONFIG = {
oauthUsageUrl: "https://api.anthropic.com/api/oauth/usage",
usageUrl: "https://api.anthropic.com/v1/organizations/{org_id}/usage",
settingsUrl: "https://api.anthropic.com/v1/settings",
apiVersion: "2023-06-01",
};
type JsonRecord = Record<string, unknown>;
@@ -469,7 +471,7 @@ async function getClaudeUsage(accessToken) {
headers: {
Authorization: `Bearer ${accessToken}`,
"anthropic-beta": "oauth-2025-04-20",
"anthropic-version": "2023-06-01",
"anthropic-version": CLAUDE_CONFIG.apiVersion,
},
});
@@ -477,36 +479,34 @@ async function getClaudeUsage(accessToken) {
const data = await oauthResponse.json();
const quotas: Record<string, any> = {};
// utilization = percentage USED (e.g., 22 means 22% used, 78% remaining)
// utilization = percentage REMAINING (e.g., 90 means 90% remaining, 10% used)
const hasUtilization = (window: any) =>
window && typeof window === "object" && safePercentage(window.utilization) !== undefined;
const createQuotaObject = (window: any) => {
const used = window?.utilization ?? 0;
const remaining = 100 - used;
const remaining = safePercentage(window.utilization) as number;
const used = 100 - remaining;
return {
used,
total: 100,
remaining,
resetAt: parseResetTime(window?.resets_at),
resetAt: parseResetTime(window.resets_at),
remainingPercentage: remaining,
unlimited: false,
};
};
if (data.five_hour && typeof data.five_hour === "object") {
if (hasUtilization(data.five_hour)) {
quotas["session (5h)"] = createQuotaObject(data.five_hour);
}
if (data.seven_day && typeof data.seven_day === "object") {
if (hasUtilization(data.seven_day)) {
quotas["weekly (7d)"] = createQuotaObject(data.seven_day);
}
// Parse model-specific weekly windows (e.g., seven_day_sonnet, seven_day_opus)
for (const [key, value] of Object.entries(data)) {
if (
key.startsWith("seven_day_") &&
key !== "seven_day" &&
value &&
typeof value === "object"
) {
if (key.startsWith("seven_day_") && key !== "seven_day" && hasUtilization(value)) {
const modelName = key.replace("seven_day_", "");
quotas[`weekly ${modelName} (7d)`] = createQuotaObject(value);
}
@@ -519,7 +519,10 @@ async function getClaudeUsage(accessToken) {
};
}
// Fallback: Try legacy settings/org endpoint (for API key users with org admin access)
// Fallback: OAuth endpoint returned non-OK, try legacy settings/org endpoint
console.warn(
`[Claude Usage] OAuth endpoint returned ${oauthResponse.status}, falling back to legacy`
);
return await getClaudeUsageLegacy(accessToken);
} catch (error) {
return { message: `Claude connected. Unable to fetch usage: ${(error as any).message}` };
@@ -536,7 +539,7 @@ async function getClaudeUsageLegacy(accessToken) {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"anthropic-version": "2023-06-01",
"anthropic-version": CLAUDE_CONFIG.apiVersion,
},
});
@@ -550,7 +553,7 @@ async function getClaudeUsageLegacy(accessToken) {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"anthropic-version": "2023-06-01",
"anthropic-version": CLAUDE_CONFIG.apiVersion,
},
}
);
+9 -5
View File
@@ -185,15 +185,19 @@ export function prepareClaudeRequest(body, provider = null) {
}
}
// 3. Tools: remove all cache_control, add only to last tool with ttl 1h
// 3. Tools: remove all cache_control, add only to last non-deferred tool with ttl 1h
// Tools with defer_loading=true cannot have cache_control (API rejects it)
if (body.tools && Array.isArray(body.tools)) {
body.tools = body.tools.map((tool, i) => {
body.tools = body.tools.map((tool) => {
const { cache_control, ...rest } = tool;
if (i === body.tools.length - 1) {
return { ...rest, cache_control: { type: "ephemeral", ttl: "1h" } };
}
return rest;
});
for (let i = body.tools.length - 1; i >= 0; i--) {
if (!body.tools[i].defer_loading) {
body.tools[i].cache_control = { type: "ephemeral", ttl: "1h" };
break;
}
}
}
return body;
@@ -275,30 +275,11 @@ export function openaiToOpenAIResponsesRequest(
// Convert assistant messages
if (role === "assistant") {
// Add reasoning content before assistant output
if (msg.reasoning_content) {
input.push({
type: "reasoning",
id: `reasoning_${input.length}`,
summary: [{ type: "summary_text", text: toString(msg.reasoning_content) }],
});
}
// Skip reasoning_content — OpenAI Responses API requires server-generated
// rs_* IDs for reasoning items. Synthesizing client-side IDs (e.g. reasoning_N)
// causes 400 errors from Responses-compatible upstreams. (#224)
// Handle thinking blocks in array content
if (Array.isArray(msg.content)) {
for (const blockValue of msg.content) {
const block = toRecord(blockValue);
if (block.type === "thinking" || block.type === "redacted_thinking") {
input.push({
type: "reasoning",
id: `reasoning_${input.length}`,
summary: [
{ type: "summary_text", text: toString(block.thinking || block.data, "...") },
],
});
}
}
}
// Skip thinking blocks in array content — same rs_* ID constraint applies
// Build assistant output content
const outputContent: unknown[] = [];
@@ -175,8 +175,13 @@ export function openaiToClaudeRequest(model, body, stream) {
};
});
if (result.tools.length > 0) {
result.tools[result.tools.length - 1].cache_control = { type: "ephemeral", ttl: "1h" };
// Add cache_control to last tool that doesn't have defer_loading
// Tools with defer_loading=true cannot have cache_control (API rejects it)
for (let i = result.tools.length - 1; i >= 0; i--) {
if (!result.tools[i].defer_loading) {
result.tools[i].cache_control = { type: "ephemeral", ttl: "1h" };
break;
}
}
}
+6 -15
View File
@@ -1,12 +1,12 @@
{
"name": "omniroute",
"version": "2.0.0",
"version": "2.0.7",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "omniroute",
"version": "2.0.0",
"version": "2.0.7",
"hasInstallScript": true,
"license": "MIT",
"workspaces": [
@@ -6596,12 +6596,12 @@
}
},
"node_modules/express-rate-limit": {
"version": "8.2.1",
"resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.2.1.tgz",
"integrity": "sha512-PCZEIEIxqwhzw4KF0n7QF4QqruVTcF73O5kFKUnGOyjbCCgizBBiFaYpd/fnBLUMPw/BWw9OsiN7GgrNYr7j6g==",
"version": "8.3.0",
"resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.0.tgz",
"integrity": "sha512-KJzBawY6fB9FiZGdE/0aftepZ91YlaGIrV8vgblRM3J8X+dHx/aiowJWwkx6LIGyuqGiANsjSwwrbb8mifOJ4Q==",
"license": "MIT",
"dependencies": {
"ip-address": "10.0.1"
"ip-address": "10.1.0"
},
"engines": {
"node": ">= 16"
@@ -6613,15 +6613,6 @@
"express": ">= 4.11"
}
},
"node_modules/express-rate-limit/node_modules/ip-address": {
"version": "10.0.1",
"resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.0.1.tgz",
"integrity": "sha512-NWv9YLW4PoW2B7xtzaS3NCot75m6nK7Icdv0o3lfMceJVRfSoQwqD4wEH5rLwoKJwUiZ/rfpiVBhnaF0FK4HoA==",
"license": "MIT",
"engines": {
"node": ">= 12"
}
},
"node_modules/fast-copy": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/fast-copy/-/fast-copy-4.0.2.tgz",
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "omniroute",
"version": "2.0.0",
"version": "2.0.8",
"description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
"type": "module",
"bin": {
@@ -1338,6 +1338,8 @@ function CustomModelsSection({ providerId, providerAlias, copied, onCopy }) {
const [customModels, setCustomModels] = useState([]);
const [newModelId, setNewModelId] = useState("");
const [newModelName, setNewModelName] = useState("");
const [newApiFormat, setNewApiFormat] = useState("chat-completions");
const [newEndpoints, setNewEndpoints] = useState(["chat"]);
const [adding, setAdding] = useState(false);
const [loading, setLoading] = useState(true);
@@ -1370,11 +1372,15 @@ function CustomModelsSection({ providerId, providerAlias, copied, onCopy }) {
provider: providerId,
modelId: newModelId.trim(),
modelName: newModelName.trim() || undefined,
apiFormat: newApiFormat,
supportedEndpoints: newEndpoints,
}),
});
if (res.ok) {
setNewModelId("");
setNewModelName("");
setNewApiFormat("chat-completions");
setNewEndpoints(["chat"]);
await fetchCustomModels();
}
} catch (e) {
@@ -1407,38 +1413,89 @@ function CustomModelsSection({ providerId, providerAlias, copied, onCopy }) {
<p className="text-xs text-text-muted mb-3">{t("customModelsHint")}</p>
{/* Add form */}
<div className="flex items-end gap-2 mb-3">
<div className="flex-1">
<label htmlFor="custom-model-id" className="text-xs text-text-muted mb-1 block">
{t("modelId")}
</label>
<input
id="custom-model-id"
type="text"
value={newModelId}
onChange={(e) => setNewModelId(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && handleAdd()}
placeholder={t("customModelPlaceholder")}
className="w-full px-3 py-2 text-sm border border-border rounded-lg bg-background focus:outline-none focus:border-primary"
/>
<div className="flex flex-col gap-3 mb-3">
<div className="flex items-end gap-2">
<div className="flex-1">
<label htmlFor="custom-model-id" className="text-xs text-text-muted mb-1 block">
{t("modelId")}
</label>
<input
id="custom-model-id"
type="text"
value={newModelId}
onChange={(e) => setNewModelId(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && handleAdd()}
placeholder={t("customModelPlaceholder")}
className="w-full px-3 py-2 text-sm border border-border rounded-lg bg-background focus:outline-none focus:border-primary"
/>
</div>
<div className="w-40">
<label htmlFor="custom-model-name" className="text-xs text-text-muted mb-1 block">
{t("displayName")}
</label>
<input
id="custom-model-name"
type="text"
value={newModelName}
onChange={(e) => setNewModelName(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && handleAdd()}
placeholder={t("optional")}
className="w-full px-3 py-2 text-sm border border-border rounded-lg bg-background focus:outline-none focus:border-primary"
/>
</div>
<Button size="sm" icon="add" onClick={handleAdd} disabled={!newModelId.trim() || adding}>
{adding ? t("adding") : t("add")}
</Button>
</div>
<div className="w-40">
<label htmlFor="custom-model-name" className="text-xs text-text-muted mb-1 block">
{t("displayName")}
</label>
<input
id="custom-model-name"
type="text"
value={newModelName}
onChange={(e) => setNewModelName(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && handleAdd()}
placeholder={t("optional")}
className="w-full px-3 py-2 text-sm border border-border rounded-lg bg-background focus:outline-none focus:border-primary"
/>
{/* API Format + Supported Endpoints */}
<div className="flex items-end gap-4 flex-wrap">
<div className="w-48">
<label htmlFor="custom-api-format" className="text-xs text-text-muted mb-1 block">
API Format
</label>
<select
id="custom-api-format"
value={newApiFormat}
onChange={(e) => setNewApiFormat(e.target.value)}
className="w-full px-3 py-2 text-sm border border-border rounded-lg bg-background focus:outline-none focus:border-primary"
>
<option value="chat-completions">Chat Completions</option>
<option value="responses">Responses API</option>
</select>
</div>
<div className="flex-1">
<span className="text-xs text-text-muted mb-1 block">Supported Endpoints</span>
<div className="flex items-center gap-3">
{["chat", "embeddings", "images", "audio"].map((ep) => (
<label
key={ep}
className="flex items-center gap-1.5 text-xs text-text-main cursor-pointer"
>
<input
type="checkbox"
checked={newEndpoints.includes(ep)}
onChange={(e) => {
if (e.target.checked) {
setNewEndpoints((prev) => [...prev, ep]);
} else {
setNewEndpoints((prev) => prev.filter((x) => x !== ep));
}
}}
className="rounded border-border"
/>
{ep === "chat"
? "💬 Chat"
: ep === "embeddings"
? "📐 Embeddings"
: ep === "images"
? "🖼️ Images"
: "🔊 Audio"}
</label>
))}
</div>
</div>
</div>
<Button size="sm" icon="add" onClick={handleAdd} disabled={!newModelId.trim() || adding}>
{adding ? t("adding") : t("add")}
</Button>
</div>
{/* List */}
@@ -1457,7 +1514,7 @@ function CustomModelsSection({ providerId, providerAlias, copied, onCopy }) {
<span className="material-symbols-outlined text-base text-primary">tune</span>
<div className="flex-1 min-w-0">
<p className="text-sm font-medium truncate">{model.name || model.id}</p>
<div className="flex items-center gap-1 mt-1">
<div className="flex items-center gap-1 mt-1 flex-wrap">
<code className="text-xs text-text-muted font-mono bg-sidebar px-1.5 py-0.5 rounded">
{fullModel}
</code>
@@ -1470,6 +1527,26 @@ function CustomModelsSection({ providerId, providerAlias, copied, onCopy }) {
{copied === copyKey ? "check" : "content_copy"}
</span>
</button>
{model.apiFormat === "responses" && (
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-blue-500/15 text-blue-400 font-medium">
Responses
</span>
)}
{model.supportedEndpoints?.includes("embeddings") && (
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-purple-500/15 text-purple-400 font-medium">
📐 Embed
</span>
)}
{model.supportedEndpoints?.includes("images") && (
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-amber-500/15 text-amber-400 font-medium">
🖼 Images
</span>
)}
{model.supportedEndpoints?.includes("audio") && (
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-green-500/15 text-green-400 font-medium">
🔊 Audio
</span>
)}
</div>
</div>
<button
@@ -150,10 +150,9 @@ export default function ProviderLimitCard({
{!loading && !error && !message && quotas?.length > 0 && (
<div className="space-y-4">
{quotas.map((quota, index) => {
// For Antigravity, use remainingPercentage if available, otherwise calculate
const percentage =
quota.remainingPercentage !== undefined
? Math.round(((quota.total - quota.used) / quota.total) * 100)
? Math.round(quota.remainingPercentage)
: calculatePercentage(quota.used, quota.total);
const unlimited = quota.total === 0 || quota.total === null;
@@ -1,4 +1,5 @@
import { getModelsByProviderId } from "@omniroute/open-sse/config/providerModels.ts";
import { safePercentage } from "@/shared/utils/formatting";
/**
* Format ISO date string to countdown format (inspired by vscode-antigravity-cockpit)
@@ -110,7 +111,7 @@ export function parseQuotaData(provider, data) {
used: quota.used || 0,
total: quota.total || 0,
resetAt: quota.resetAt || null,
remainingPercentage: quota.remainingPercentage,
remainingPercentage: safePercentage(quota.remainingPercentage),
});
});
}
@@ -159,7 +160,7 @@ export function parseQuotaData(provider, data) {
used: quota.used || 0,
total: quota.total || 0,
resetAt: quota.resetAt || null,
remainingPercentage: quota.remainingPercentage,
remainingPercentage: safePercentage(quota.remainingPercentage),
});
});
}
+27 -9
View File
@@ -221,9 +221,15 @@ export async function POST(
let connection: any;
if (tokenData.email) {
const existing = await getProviderConnections({ provider });
const match = existing.find(
(c: any) => c.email === tokenData.email && c.authType === "oauth"
);
const match = existing.find((c: any) => {
if (c.email !== tokenData.email || c.authType !== "oauth") return false;
// For Codex, also check workspaceId to avoid overwriting different workspace connections
if (provider === "codex" && tokenData.providerSpecificData?.workspaceId) {
const existingWorkspace = c.providerSpecificData?.workspaceId;
return existingWorkspace === tokenData.providerSpecificData.workspaceId;
}
return true;
});
const matchId = typeof match?.id === "string" ? match.id : null;
if (matchId) {
connection = await updateProviderConnection(matchId, {
@@ -285,9 +291,15 @@ export async function POST(
let connection: any;
if (result.tokens.email) {
const existing = await getProviderConnections({ provider });
const match = existing.find(
(c: any) => c.email === result.tokens.email && c.authType === "oauth"
);
const match = existing.find((c: any) => {
if (c.email !== result.tokens.email || c.authType !== "oauth") return false;
// For Codex, also check workspaceId to avoid overwriting different workspace connections
if (provider === "codex" && result.tokens.providerSpecificData?.workspaceId) {
const existingWorkspace = c.providerSpecificData?.workspaceId;
return existingWorkspace === result.tokens.providerSpecificData.workspaceId;
}
return true;
});
const matchId = typeof match?.id === "string" ? match.id : null;
if (matchId) {
connection = await updateProviderConnection(matchId, {
@@ -399,9 +411,15 @@ export async function POST(
let connection: any;
if (tokenData.email) {
const existing = await getProviderConnections({ provider });
const match = existing.find(
(c: any) => c.email === tokenData.email && c.authType === "oauth"
);
const match = existing.find((c: any) => {
if (c.email !== tokenData.email || c.authType !== "oauth") return false;
// For Codex, also check workspaceId to avoid overwriting different workspace connections
if (provider === "codex" && tokenData.providerSpecificData?.workspaceId) {
const existingWorkspace = c.providerSpecificData?.workspaceId;
return existingWorkspace === tokenData.providerSpecificData.workspaceId;
}
return true;
});
const matchId = typeof match?.id === "string" ? match.id : null;
if (matchId) {
connection = await updateProviderConnection(matchId, {
+9 -2
View File
@@ -64,9 +64,16 @@ export async function POST(request) {
if (isValidationFailure(validation)) {
return Response.json({ error: validation.error }, { status: 400 });
}
const { provider, modelId, modelName, source } = validation.data;
const { provider, modelId, modelName, source, apiFormat, supportedEndpoints } = validation.data;
const model = await addCustomModel(provider, modelId, modelName, source || "manual");
const model = await addCustomModel(
provider,
modelId,
modelName,
source || "manual",
apiFormat,
supportedEndpoints
);
return Response.json({ model });
} catch (error) {
console.error("Error adding provider model:", error);
+11
View File
@@ -4,6 +4,11 @@ import { getUsageForProvider } from "@omniroute/open-sse/services/usage.ts";
import { getExecutor } from "@omniroute/open-sse/executors/index.ts";
import { syncToCloud } from "@/lib/cloudSync";
import { runWithProxyContext } from "@omniroute/open-sse/utils/proxyFetch.ts";
import { setQuotaCache } from "@/domain/quotaCache";
function isRecord(value: unknown): value is Record<string, any> {
return value !== null && typeof value === "object" && !Array.isArray(value);
}
/**
* Sync to cloud if enabled
@@ -147,6 +152,12 @@ export async function GET(request: Request, { params }: { params: Promise<{ conn
const usage = await runWithProxyContext(proxyInfo?.proxy || null, () =>
getUsageForProvider(connection)
);
// Populate quota cache for quota-aware account selection
if (isRecord(usage?.quotas)) {
setQuotaCache(connectionId, connection.provider, usage.quotas);
}
return Response.json(usage);
} catch (error) {
console.error("[Usage API] Error fetching usage:", error);
+38 -16
View File
@@ -13,6 +13,8 @@ import { enforceApiKeyPolicy } from "@/shared/utils/apiKeyPolicy";
import { v1EmbeddingsSchema } from "@/shared/validation/schemas";
import { isValidationFailure, validateBody } from "@/shared/validation/helpers";
import { getAllCustomModels } from "@/lib/localDb";
/**
* Handle CORS preflight
*/
@@ -30,23 +32,43 @@ export async function OPTIONS() {
* GET /v1/embeddings list available embedding models
*/
export async function GET() {
const models = getAllEmbeddingModels();
return new Response(
JSON.stringify({
object: "list",
data: models.map((m) => ({
id: m.id,
object: "model",
created: Math.floor(Date.now() / 1000),
owned_by: m.provider,
type: "embedding",
dimensions: m.dimensions,
})),
}),
{
headers: { "Content-Type": "application/json" },
const builtInModels = getAllEmbeddingModels();
const timestamp = Math.floor(Date.now() / 1000);
const data = builtInModels.map((m) => ({
id: m.id,
object: "model",
created: timestamp,
owned_by: m.provider,
type: "embedding",
dimensions: m.dimensions,
}));
// Include custom models tagged for embeddings
try {
const customModelsMap = (await getAllCustomModels()) as Record<string, any>;
for (const [providerId, models] of Object.entries(customModelsMap)) {
if (!Array.isArray(models)) continue;
for (const model of models) {
if (!model?.id || !Array.isArray(model.supportedEndpoints)) continue;
if (!model.supportedEndpoints.includes("embeddings")) continue;
const fullId = `${providerId}/${model.id}`;
if (data.some((d) => d.id === fullId)) continue;
data.push({
id: fullId,
object: "model",
created: timestamp,
owned_by: providerId,
type: "embedding",
dimensions: null,
});
}
}
);
} catch {}
return new Response(JSON.stringify({ object: "list", data }), {
headers: { "Content-Type": "application/json" },
});
}
/**
+77 -18
View File
@@ -14,6 +14,8 @@ import { enforceApiKeyPolicy } from "@/shared/utils/apiKeyPolicy";
import { v1ImageGenerationSchema } from "@/shared/validation/schemas";
import { isValidationFailure, validateBody } from "@/shared/validation/helpers";
import { getAllCustomModels } from "@/lib/localDb";
/**
* Handle CORS preflight
*/
@@ -31,23 +33,43 @@ export async function OPTIONS() {
* GET /v1/images/generations list available image models
*/
export async function GET() {
const models = getAllImageModels();
return new Response(
JSON.stringify({
object: "list",
data: models.map((m) => ({
id: m.id,
object: "model",
created: Math.floor(Date.now() / 1000),
owned_by: m.provider,
type: "image",
supported_sizes: m.supportedSizes,
})),
}),
{
headers: { "Content-Type": "application/json" },
const builtInModels = getAllImageModels();
const timestamp = Math.floor(Date.now() / 1000);
const data = builtInModels.map((m) => ({
id: m.id,
object: "model",
created: timestamp,
owned_by: m.provider,
type: "image",
supported_sizes: m.supportedSizes,
}));
// Include custom models tagged for images
try {
const customModelsMap = (await getAllCustomModels()) as Record<string, any>;
for (const [providerId, models] of Object.entries(customModelsMap)) {
if (!Array.isArray(models)) continue;
for (const model of models) {
if (!model?.id || !Array.isArray(model.supportedEndpoints)) continue;
if (!model.supportedEndpoints.includes("images")) continue;
const fullId = `${providerId}/${model.id}`;
if (data.some((d) => d.id === fullId)) continue;
data.push({
id: fullId,
object: "model",
created: timestamp,
owned_by: providerId,
type: "image",
supported_sizes: null,
});
}
}
);
} catch {}
return new Response(JSON.stringify({ object: "list", data }), {
headers: { "Content-Type": "application/json" },
});
}
/**
@@ -85,7 +107,30 @@ export async function POST(request) {
if (policy.rejection) return policy.rejection;
// Parse model to get provider
const { provider } = parseImageModel(body.model);
let { provider } = parseImageModel(body.model);
let isCustomModel = false;
// If not in built-in registry, check custom models tagged for images
if (!provider) {
try {
const customModelsMap = (await getAllCustomModels()) as Record<string, any>;
for (const [providerId, models] of Object.entries(customModelsMap)) {
if (!Array.isArray(models)) continue;
for (const model of models) {
if (!model?.id || !Array.isArray(model.supportedEndpoints)) continue;
if (!model.supportedEndpoints.includes("images")) continue;
const fullId = `${providerId}/${model.id}`;
if (fullId === body.model) {
provider = providerId;
isCustomModel = true;
break;
}
}
if (provider) break;
}
} catch {}
}
if (!provider) {
return errorResponse(
HTTP_STATUS.BAD_REQUEST,
@@ -106,9 +151,23 @@ export async function POST(request) {
`No credentials for image provider: ${provider}`
);
}
} else if (isCustomModel) {
// Custom models need credentials from the provider connection
credentials = await getProviderCredentials(provider);
if (!credentials) {
return errorResponse(
HTTP_STATUS.BAD_REQUEST,
`No credentials for custom image provider: ${provider}`
);
}
}
const result = await handleImageGeneration({ body, credentials, log });
const result = await handleImageGeneration({
body,
credentials,
log,
...(isCustomModel && { resolvedProvider: provider }),
});
if (result.success) {
return new Response(JSON.stringify((result as any).data), {
+17
View File
@@ -364,6 +364,17 @@ export async function getUnifiedModelsResponse(
const aliasId = `${alias}/${modelId}`;
if (models.some((m) => m.id === aliasId)) continue;
// Determine type from supportedEndpoints
const endpoints = Array.isArray(model.supportedEndpoints)
? model.supportedEndpoints
: ["chat"];
const apiFormat =
typeof model.apiFormat === "string" ? model.apiFormat : "chat-completions";
let modelType: string | undefined;
if (endpoints.includes("embeddings")) modelType = "embedding";
else if (endpoints.includes("images")) modelType = "image";
else if (endpoints.includes("audio")) modelType = "audio";
models.push({
id: aliasId,
object: "model",
@@ -373,6 +384,11 @@ export async function getUnifiedModelsResponse(
root: modelId,
parent: null,
custom: true,
...(modelType ? { type: modelType } : {}),
...(apiFormat !== "chat-completions" ? { api_format: apiFormat } : {}),
...(endpoints.length > 1 || !endpoints.includes("chat")
? { supported_endpoints: endpoints }
: {}),
});
// Only add provider-prefixed version if different from alias
@@ -388,6 +404,7 @@ export async function getUnifiedModelsResponse(
root: modelId,
parent: aliasId,
custom: true,
...(modelType ? { type: modelType } : {}),
});
}
}
+3
View File
@@ -6,6 +6,9 @@
directives ensure all utility classes in route groups are included. */
@source "../app/(dashboard)";
@source "../../open-sse";
@source not "../../*.sqlite*";
@source not "../../.claude*";
@source not "../../.claude-memory";
@custom-variant dark (&:where(.dark, .dark *));
+264
View File
@@ -0,0 +1,264 @@
/**
* Quota Cache Domain Layer
*
* In-memory cache of provider quota data per connectionId.
* Populated by:
* - Dashboard usage endpoint (GET /api/usage/[connectionId])
* - 429 responses marking account as exhausted
*
* Background refresh runs every 1 minute:
* - Active accounts (quota > 0%): refetch every 5 minutes
* - Exhausted accounts: refetch every 5 minutes (or immediately after resetAt passes)
*
* @module domain/quotaCache
*/
import { getUsageForProvider } from "@omniroute/open-sse/services/usage.ts";
import { getProviderConnectionById, resolveProxyForConnection } from "@/lib/localDb";
import { runWithProxyContext } from "@omniroute/open-sse/utils/proxyFetch.ts";
import { safePercentage } from "@/shared/utils/formatting";
// ─── Types ──────────────────────────────────────────────────────────────────
interface QuotaInfo {
remainingPercentage: number;
resetAt: string | null;
}
interface QuotaCacheEntry {
connectionId: string;
provider: string;
quotas: Record<string, QuotaInfo>;
fetchedAt: number;
exhausted: boolean;
nextResetAt: string | null;
}
// ─── Constants ──────────────────────────────────────────────────────────────
const ACTIVE_TTL_MS = 5 * 60 * 1000; // 5 minutes for active accounts
const EXHAUSTED_TTL_MS = 5 * 60 * 1000; // 5 minutes for 429-sourced entries (no resetAt)
const EXHAUSTED_REFRESH_MS = 5 * 60 * 1000; // 5 minutes: recheck exhausted accounts (aligned with TTL)
const REFRESH_INTERVAL_MS = 60 * 1000; // Background tick every 1 minute
// ─── State ──────────────────────────────────────────────────────────────────
const cache = new Map<string, QuotaCacheEntry>();
const MAX_CONCURRENT_REFRESHES = 5;
let refreshTimer: ReturnType<typeof setInterval> | null = null;
let tickRunning = false;
// ─── Helpers ────────────────────────────────────────────────────────────────
function isExhausted(quotas: Record<string, QuotaInfo>): boolean {
const entries = Object.values(quotas);
if (entries.length === 0) return false;
return entries.every((q) => q.remainingPercentage <= 0);
}
function parseDate(value: string): number | null {
const ms = new Date(value).getTime();
return Number.isNaN(ms) ? null : ms;
}
function earliestResetAt(quotas: Record<string, QuotaInfo>): string | null {
let earliest: string | null = null;
let earliestMs = Infinity;
for (const q of Object.values(quotas)) {
if (!q.resetAt) continue;
const ms = parseDate(q.resetAt);
if (ms !== null && ms < earliestMs) {
earliestMs = ms;
earliest = q.resetAt;
}
}
return earliest;
}
function normalizeQuotas(rawQuotas: Record<string, any>): Record<string, QuotaInfo> {
const result: Record<string, QuotaInfo> = {};
for (const [key, q] of Object.entries(rawQuotas)) {
if (q && typeof q === "object") {
result[key] = {
remainingPercentage:
safePercentage(q.remainingPercentage) ??
(q.total > 0 ? Math.round(((q.total - (q.used || 0)) / q.total) * 100) : 0),
resetAt: q.resetAt || null,
};
}
}
return result;
}
// ─── Public API ─────────────────────────────────────────────────────────────
/**
* Store quota data for a connection (called by usage endpoint and background refresh).
*/
export function setQuotaCache(
connectionId: string,
provider: string,
rawQuotas: Record<string, any>
) {
const quotas = normalizeQuotas(rawQuotas);
const exhausted = isExhausted(quotas);
cache.set(connectionId, {
connectionId,
provider,
quotas,
fetchedAt: Date.now(),
exhausted,
nextResetAt: exhausted ? earliestResetAt(quotas) : null,
});
}
/**
* Get cached quota entry (returns null if not cached).
*/
export function getQuotaCache(connectionId: string): QuotaCacheEntry | null {
return cache.get(connectionId) || null;
}
/**
* Check if an account's quota is exhausted based on cached data.
* Returns false if no cache entry exists (unknown = assume available).
*/
export function isAccountQuotaExhausted(connectionId: string): boolean {
const entry = cache.get(connectionId);
if (!entry) return false;
if (!entry.exhausted) return false;
// If resetAt has passed, assume available until refresh confirms
if (entry.nextResetAt) {
const resetMs = parseDate(entry.nextResetAt);
if (resetMs !== null && resetMs <= Date.now()) return false;
}
// Exhausted entries without resetAt expire after fixed TTL
const age = Date.now() - entry.fetchedAt;
if (!entry.nextResetAt && age > EXHAUSTED_TTL_MS) return false;
return true;
}
/**
* Mark an account as quota-exhausted from a 429 response (no quota data available).
* Uses 5-minute fixed TTL since we don't know the actual resetAt.
*/
export function markAccountExhaustedFrom429(connectionId: string, provider: string) {
cache.set(connectionId, {
connectionId,
provider,
quotas: {},
fetchedAt: Date.now(),
exhausted: true,
nextResetAt: null,
});
}
// ─── Background Refresh ─────────────────────────────────────────────────────
const refreshingSet = new Set<string>();
async function refreshEntry(entry: QuotaCacheEntry) {
if (refreshingSet.has(entry.connectionId)) return;
refreshingSet.add(entry.connectionId);
try {
const connection = await getProviderConnectionById(entry.connectionId);
if (!connection || connection.authType !== "oauth" || !connection.isActive) {
cache.delete(entry.connectionId);
return;
}
const proxyInfo = await resolveProxyForConnection(entry.connectionId);
const usage = await runWithProxyContext(proxyInfo?.proxy || null, () =>
getUsageForProvider(connection)
);
if (usage?.quotas) {
setQuotaCache(entry.connectionId, entry.provider, usage.quotas);
}
} catch (err) {
console.warn(
`[QuotaCache] Refresh failed for ${entry.connectionId.slice(0, 8)}:`,
(err as any)?.message || err
);
} finally {
refreshingSet.delete(entry.connectionId);
}
}
function needsRefresh(entry: QuotaCacheEntry, now: number): boolean {
const age = now - entry.fetchedAt;
if (entry.exhausted) {
if (entry.nextResetAt) {
const resetMs = parseDate(entry.nextResetAt);
if (resetMs !== null && resetMs <= now) return true;
}
return age >= EXHAUSTED_REFRESH_MS;
}
return age >= ACTIVE_TTL_MS;
}
async function backgroundRefreshTick() {
if (tickRunning) return;
tickRunning = true;
try {
const now = Date.now();
const pending = [...cache.values()].filter((e) => needsRefresh(e, now));
// Refresh in batches to avoid thundering herd
for (let i = 0; i < pending.length; i += MAX_CONCURRENT_REFRESHES) {
const batch = pending.slice(i, i + MAX_CONCURRENT_REFRESHES);
await Promise.allSettled(batch.map(refreshEntry));
}
} finally {
tickRunning = false;
}
}
/**
* Start the background refresh timer.
*/
export function startBackgroundRefresh() {
if (refreshTimer) return;
refreshTimer = setInterval(backgroundRefreshTick, REFRESH_INTERVAL_MS);
refreshTimer?.unref?.();
}
/**
* Stop the background refresh timer.
*/
export function stopBackgroundRefresh() {
if (refreshTimer) {
clearInterval(refreshTimer);
refreshTimer = null;
}
}
/**
* Get cache stats (for debugging/dashboard).
*/
export function getQuotaCacheStats() {
const entries: Array<{
connectionId: string;
provider: string;
exhausted: boolean;
nextResetAt: string | null;
ageMs: number;
}> = [];
for (const entry of cache.values()) {
entries.push({
connectionId: entry.connectionId.slice(0, 8) + "...",
provider: entry.provider,
exhausted: entry.exhausted,
nextResetAt: entry.nextResetAt,
ageMs: Date.now() - entry.fetchedAt,
});
}
return { total: cache.size, entries };
}
+18 -6
View File
@@ -8,22 +8,27 @@
* @see https://nextjs.org/docs/app/building-your-application/optimizing/instrumentation
*/
function ensureJwtSecret(): void {
function ensureSecrets(): void {
// eslint-disable-next-line no-eval
const crypto = eval("require")("crypto");
if (!process.env.JWT_SECRET || process.env.JWT_SECRET.trim() === "") {
// Use eval to hide require from webpack's static analysis
// This code only runs in Node.js runtime (guarded by NEXT_RUNTIME check)
// eslint-disable-next-line no-eval
const crypto = eval("require")("crypto");
const generated = crypto.randomBytes(48).toString("base64");
process.env.JWT_SECRET = generated;
console.log("[STARTUP] JWT_SECRET auto-generated (random 64-char secret)");
}
if (!process.env.API_KEY_SECRET || process.env.API_KEY_SECRET.trim() === "") {
const generated = crypto.randomBytes(32).toString("hex");
process.env.API_KEY_SECRET = generated;
console.log("[STARTUP] API_KEY_SECRET auto-generated (random 64-char hex secret)");
}
}
export async function register() {
// Only run on the server (not during build or in Edge runtime)
if (process.env.NEXT_RUNTIME === "nodejs") {
ensureJwtSecret();
ensureSecrets();
// Console log file capture (must be first — before any logging occurs)
const { initConsoleInterceptor } = await import("@/lib/consoleInterceptor");
initConsoleInterceptor();
@@ -34,6 +39,13 @@ export async function register() {
const { initApiBridgeServer } = await import("@/lib/apiBridgeServer");
initApiBridgeServer();
// Quota cache: start background refresh for quota-aware account selection
// Dynamic import required — quotaCache depends on better-sqlite3 (Node-only),
// and instrumentation.ts is bundled for all runtimes including Edge.
const { startBackgroundRefresh } = await import("@/domain/quotaCache");
startBackgroundRefresh();
console.log("[STARTUP] Quota cache background refresh started");
// Compliance: Initialize audit_log table + cleanup expired logs
try {
const { initAuditLog, cleanupExpiredLogs } = await import("@/lib/compliance/index");
+8 -2
View File
@@ -79,6 +79,7 @@ const SCHEMA_SQL = `
token_type TEXT,
consecutive_use_count INTEGER DEFAULT 0,
rate_limit_protection INTEGER DEFAULT 0,
last_used_at TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
@@ -311,6 +312,10 @@ function ensureProviderConnectionsColumns(db: SqliteDatabase) {
);
console.log("[DB] Added provider_connections.rate_limit_protection column");
}
if (!columnNames.has("last_used_at")) {
db.exec("ALTER TABLE provider_connections ADD COLUMN last_used_at TEXT");
console.log("[DB] Added provider_connections.last_used_at column");
}
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
console.warn("[DB] Failed to verify provider_connections schema:", message);
@@ -483,7 +488,7 @@ function migrateFromJson(db: SqliteDatabase, jsonPath: string) {
rate_limited_until, health_check_interval, last_health_check_at,
last_tested, api_key, id_token, provider_specific_data,
expires_in, display_name, global_priority, default_model,
token_type, consecutive_use_count, rate_limit_protection, created_at, updated_at
token_type, consecutive_use_count, rate_limit_protection, last_used_at, created_at, updated_at
) VALUES (
@id, @provider, @authType, @name, @email, @priority, @isActive,
@accessToken, @refreshToken, @expiresAt, @tokenExpiresAt,
@@ -492,7 +497,7 @@ function migrateFromJson(db: SqliteDatabase, jsonPath: string) {
@rateLimitedUntil, @healthCheckInterval, @lastHealthCheckAt,
@lastTested, @apiKey, @idToken, @providerSpecificData,
@expiresIn, @displayName, @globalPriority, @defaultModel,
@tokenType, @consecutiveUseCount, @rateLimitProtection, @createdAt, @updatedAt
@tokenType, @consecutiveUseCount, @rateLimitProtection, @lastUsedAt, @createdAt, @updatedAt
)
`);
@@ -533,6 +538,7 @@ function migrateFromJson(db: SqliteDatabase, jsonPath: string) {
defaultModel: conn.defaultModel || null,
tokenType: conn.tokenType || null,
consecutiveUseCount: conn.consecutiveUseCount || 0,
lastUsedAt: conn.lastUsedAt || null,
rateLimitProtection:
conn.rateLimitProtection === true || conn.rateLimitProtection === 1 ? 1 : 0,
createdAt: conn.createdAt || new Date().toISOString(),
+15 -2
View File
@@ -115,7 +115,14 @@ export async function getAllCustomModels() {
return result;
}
export async function addCustomModel(providerId, modelId, modelName, source = "manual") {
export async function addCustomModel(
providerId: string,
modelId: string,
modelName?: string,
source = "manual",
apiFormat: "chat-completions" | "responses" = "chat-completions",
supportedEndpoints: string[] = ["chat"]
) {
const db = getDbInstance();
const row = db
.prepare("SELECT value FROM key_value WHERE namespace = 'customModels' AND key = ?")
@@ -126,7 +133,13 @@ export async function addCustomModel(providerId, modelId, modelName, source = "m
const exists = models.find((m) => m.id === modelId);
if (exists) return exists;
const model = { id: modelId, name: modelName || modelId, source };
const model = {
id: modelId,
name: modelName || modelId,
source,
apiFormat,
supportedEndpoints,
};
models.push(model);
db.prepare(
"INSERT OR REPLACE INTO key_value (namespace, key, value) VALUES ('customModels', ?, ?)"
+5 -2
View File
@@ -217,7 +217,7 @@ function _insertConnectionRow(db: DbLike, conn: JsonRecord) {
rate_limited_until, health_check_interval, last_health_check_at,
last_tested, api_key, id_token, provider_specific_data,
expires_in, display_name, global_priority, default_model,
token_type, consecutive_use_count, rate_limit_protection, created_at, updated_at
token_type, consecutive_use_count, rate_limit_protection, last_used_at, created_at, updated_at
) VALUES (
@id, @provider, @authType, @name, @email, @priority, @isActive,
@accessToken, @refreshToken, @expiresAt, @tokenExpiresAt,
@@ -226,7 +226,7 @@ function _insertConnectionRow(db: DbLike, conn: JsonRecord) {
@rateLimitedUntil, @healthCheckInterval, @lastHealthCheckAt,
@lastTested, @apiKey, @idToken, @providerSpecificData,
@expiresIn, @displayName, @globalPriority, @defaultModel,
@tokenType, @consecutiveUseCount, @rateLimitProtection, @createdAt, @updatedAt
@tokenType, @consecutiveUseCount, @rateLimitProtection, @lastUsedAt, @createdAt, @updatedAt
)
`
).run({
@@ -267,6 +267,7 @@ function _insertConnectionRow(db: DbLike, conn: JsonRecord) {
consecutiveUseCount: conn.consecutiveUseCount || 0,
rateLimitProtection:
conn.rateLimitProtection === true || conn.rateLimitProtection === 1 ? 1 : 0,
lastUsedAt: conn.lastUsedAt || null,
createdAt: conn.createdAt,
updatedAt: conn.updatedAt,
});
@@ -290,6 +291,7 @@ function _updateConnectionRow(db: DbLike, id: string, data: JsonRecord) {
default_model = @defaultModel, token_type = @tokenType,
consecutive_use_count = @consecutiveUseCount,
rate_limit_protection = @rateLimitProtection,
last_used_at = @lastUsedAt,
updated_at = @updatedAt
WHERE id = @id
`
@@ -331,6 +333,7 @@ function _updateConnectionRow(db: DbLike, id: string, data: JsonRecord) {
consecutiveUseCount: data.consecutiveUseCount || 0,
rateLimitProtection:
data.rateLimitProtection === true || data.rateLimitProtection === 1 ? 1 : 0,
lastUsedAt: data.lastUsedAt || null,
updatedAt: now,
});
}
+20 -2
View File
@@ -307,11 +307,29 @@ async function validateOpenAICompatibleProvider({ apiKey, providerSpecificData =
if (chatRes.status >= 500) {
return { valid: false, error: `Provider unavailable (${chatRes.status})` };
}
} catch {
// Chat test also failed — fall through to simple connectivity check
}
// Step 3: Final fallback — simple connectivity check
// For local providers (Ollama, LM Studio, etc.) that may not respond to
// standard OpenAI endpoints but are still reachable
try {
const pingRes = await fetch(baseUrl, {
method: "GET",
headers: buildBearerHeaders(apiKey),
signal: AbortSignal.timeout(5000),
});
// If the server responds at all (even with an error page), it's reachable
if (pingRes.status < 500) {
return { valid: true, error: null };
}
return { valid: false, error: `Provider unavailable (${pingRes.status})` };
} catch (error: any) {
return { valid: false, error: error.message || "Connection failed" };
}
return { valid: false, error: "Validation failed" };
}
async function validateAnthropicCompatibleProvider({ apiKey, providerSpecificData = {} }: any) {
+1
View File
@@ -37,6 +37,7 @@ export const comboNodeSchema = z.object({
export const comboSchema = z.object({
name: z.string().min(1, "Combo name is required").max(100),
model: z.string().min(1, "Model pattern is required"),
endpoint: z.enum(["chat", "embeddings", "images"]).default("chat"),
strategy: z
.enum(["priority", "weighted", "round-robin", "random", "least-used", "cost-optimized"])
.default("priority"),
+8
View File
@@ -148,3 +148,11 @@ export function truncateUrl(url, max = 50) {
return url.length > max ? url.slice(0, max) + "…" : url;
}
}
/**
* Safely extract a finite number, returning undefined for invalid values.
* Used by quota normalization in both backend (quotaCache) and frontend (ProviderLimits).
*/
export function safePercentage(value: unknown): number | undefined {
return typeof value === "number" && isFinite(value) ? value : undefined;
}
-912
View File
@@ -1,912 +0,0 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.dbBackupRestoreSchema =
exports.testComboSchema =
exports.updateComboSchema =
exports.cloudSyncActionSchema =
exports.cloudModelAliasUpdateSchema =
exports.cloudResolveAliasSchema =
exports.cloudCredentialUpdateSchema =
exports.kiroSocialExchangeSchema =
exports.kiroImportSchema =
exports.cursorImportSchema =
exports.oauthPollSchema =
exports.oauthExchangeSchema =
exports.translatorTranslateSchema =
exports.translatorSendSchema =
exports.translatorSaveSchema =
exports.translatorDetectSchema =
exports.testProxySchema =
exports.updateProxyConfigSchema =
exports.removeModelAliasSchema =
exports.addModelAliasSchema =
exports.updateModelAliasesSchema =
exports.updateIpFilterSchema =
exports.updateThinkingBudgetSchema =
exports.updateSystemPromptSchema =
exports.updateRequireLoginSchema =
exports.updateComboDefaultsSchema =
exports.resetStatsActionSchema =
exports.jsonObjectSchema =
exports.updateResilienceSchema =
exports.toggleRateLimitSchema =
exports.updatePricingSchema =
exports.providerModelMutationSchema =
exports.clearModelAvailabilitySchema =
exports.updateModelAliasSchema =
exports.removeFallbackSchema =
exports.registerFallbackSchema =
exports.policyActionSchema =
exports.setBudgetSchema =
exports.v1CountTokensSchema =
exports.providerChatCompletionSchema =
exports.v1RerankSchema =
exports.v1ModerationSchema =
exports.v1AudioSpeechSchema =
exports.v1ImageGenerationSchema =
exports.v1EmbeddingsSchema =
exports.loginSchema =
exports.updateSettingsSchema =
exports.createComboSchema =
exports.createKeySchema =
exports.createProviderSchema =
void 0;
exports.guideSettingsSaveSchema =
exports.codexProfileIdSchema =
exports.codexProfileNameSchema =
exports.cliModelConfigSchema =
exports.cliSettingsEnvSchema =
exports.cliBackupMutationSchema =
exports.cliMitmAliasUpdateSchema =
exports.cliMitmStopSchema =
exports.cliMitmStartSchema =
exports.v1betaGeminiGenerateSchema =
exports.validateProviderApiKeySchema =
exports.providersBatchTestSchema =
exports.updateProviderConnectionSchema =
exports.providerNodeValidateSchema =
exports.updateProviderNodeSchema =
exports.createProviderNodeSchema =
exports.updateKeyPermissionsSchema =
exports.evalRunSuiteSchema =
void 0;
exports.validateBody = validateBody;
var zod_1 = require("zod");
// ──── Provider Schemas ────
exports.createProviderSchema = zod_1.z.object({
provider: zod_1.z.string().min(1).max(100),
apiKey: zod_1.z.string().min(1).max(10000),
name: zod_1.z.string().min(1).max(200),
priority: zod_1.z.number().int().min(1).max(100).optional(),
globalPriority: zod_1.z.number().int().min(1).max(100).nullable().optional(),
defaultModel: zod_1.z.string().max(200).nullable().optional(),
testStatus: zod_1.z.string().max(50).optional(),
});
// ──── API Key Schemas ────
exports.createKeySchema = zod_1.z.object({
name: zod_1.z.string().min(1, "Name is required").max(200),
});
// ──── Combo Schemas ────
// A model entry can be a plain string (legacy) or an object with weight
var comboModelEntry = zod_1.z.union([
zod_1.z.string(),
zod_1.z.object({
model: zod_1.z.string().min(1),
weight: zod_1.z.number().min(0).max(100).default(0),
}),
]);
// Per-combo config overrides
var comboConfigSchema = zod_1.z
.object({
maxRetries: zod_1.z.number().int().min(0).max(10).optional(),
retryDelayMs: zod_1.z.number().int().min(0).max(60000).optional(),
timeoutMs: zod_1.z.number().int().min(1000).max(600000).optional(),
healthCheckEnabled: zod_1.z.boolean().optional(),
})
.optional();
var comboStrategySchema = zod_1.z.enum([
"priority",
"weighted",
"round-robin",
"random",
"least-used",
"cost-optimized",
]);
var comboRuntimeConfigSchema = zod_1.z
.object({
strategy: comboStrategySchema.optional(),
maxRetries: zod_1.z.coerce.number().int().min(0).max(10).optional(),
retryDelayMs: zod_1.z.coerce.number().int().min(0).max(60000).optional(),
timeoutMs: zod_1.z.coerce.number().int().min(1000).max(600000).optional(),
concurrencyPerModel: zod_1.z.coerce.number().int().min(1).max(20).optional(),
queueTimeoutMs: zod_1.z.coerce.number().int().min(1000).max(120000).optional(),
healthCheckEnabled: zod_1.z.boolean().optional(),
healthCheckTimeoutMs: zod_1.z.coerce.number().int().min(100).max(30000).optional(),
maxComboDepth: zod_1.z.coerce.number().int().min(1).max(10).optional(),
trackMetrics: zod_1.z.boolean().optional(),
})
.strict();
exports.createComboSchema = zod_1.z.object({
name: zod_1.z
.string()
.min(1, "Name is required")
.max(100)
.regex(/^[a-zA-Z0-9_/.-]+$/, "Name can only contain letters, numbers, -, _, / and ."),
models: zod_1.z.array(comboModelEntry).optional().default([]),
strategy: comboStrategySchema.optional().default("priority"),
config: comboConfigSchema,
});
// ──── Settings Schemas ────
// FASE-01: Removed .passthrough() — only explicitly listed fields are accepted
exports.updateSettingsSchema = zod_1.z.object({
newPassword: zod_1.z.string().min(1).max(200).optional(),
currentPassword: zod_1.z.string().max(200).optional(),
theme: zod_1.z.string().max(50).optional(),
language: zod_1.z.string().max(10).optional(),
requireLogin: zod_1.z.boolean().optional(),
enableRequestLogs: zod_1.z.boolean().optional(),
enableSocks5Proxy: zod_1.z.boolean().optional(),
instanceName: zod_1.z.string().max(100).optional(),
corsOrigins: zod_1.z.string().max(500).optional(),
logRetentionDays: zod_1.z.number().int().min(1).max(365).optional(),
cloudUrl: zod_1.z.string().max(500).optional(),
baseUrl: zod_1.z.string().max(500).optional(),
setupComplete: zod_1.z.boolean().optional(),
requireAuthForModels: zod_1.z.boolean().optional(),
blockedProviders: zod_1.z.array(zod_1.z.string().max(100)).optional(),
hideHealthCheckLogs: zod_1.z.boolean().optional(),
// Routing settings (#134)
fallbackStrategy: zod_1.z
.enum(["fill-first", "round-robin", "p2c", "random", "least-used", "cost-optimized"])
.optional(),
wildcardAliases: zod_1.z
.array(zod_1.z.object({ pattern: zod_1.z.string(), target: zod_1.z.string() }))
.optional(),
stickyRoundRobinLimit: zod_1.z.number().int().min(0).max(1000).optional(),
});
// ──── Auth Schemas ────
exports.loginSchema = zod_1.z.object({
password: zod_1.z.string().min(1, "Password is required").max(200),
});
// ──── API Route Payload Schemas (T06) ────
var modelIdSchema = zod_1.z.string().trim().min(1, "Model is required").max(200);
var nonEmptyStringSchema = zod_1.z.string().trim().min(1, "Field is required");
var embeddingTokenArraySchema = zod_1.z
.array(zod_1.z.number().int().min(0))
.min(1, "input token array must contain at least one item");
var embeddingInputSchema = zod_1.z.union([
nonEmptyStringSchema,
zod_1.z.array(nonEmptyStringSchema).min(1, "input must contain at least one item"),
embeddingTokenArraySchema,
zod_1.z.array(embeddingTokenArraySchema).min(1, "input must contain at least one item"),
]);
var chatMessageSchema = zod_1.z
.object({
role: zod_1.z.string().trim().min(1, "messages[].role is required"),
content: zod_1.z
.union([nonEmptyStringSchema, zod_1.z.array(zod_1.z.unknown()).min(1), zod_1.z.null()])
.optional(),
})
.catchall(zod_1.z.unknown());
var countTokensMessageSchema = zod_1.z
.object({
content: zod_1.z.union([
nonEmptyStringSchema,
zod_1.z
.array(
zod_1.z
.object({
type: zod_1.z.string().optional(),
text: zod_1.z.string().optional(),
})
.catchall(zod_1.z.unknown())
)
.min(1, "messages[].content must contain at least one item"),
]),
})
.catchall(zod_1.z.unknown());
exports.v1EmbeddingsSchema = zod_1.z
.object({
model: modelIdSchema,
input: embeddingInputSchema,
dimensions: zod_1.z.coerce.number().int().positive().optional(),
encoding_format: zod_1.z.enum(["float", "base64"]).optional(),
})
.catchall(zod_1.z.unknown());
exports.v1ImageGenerationSchema = zod_1.z
.object({
model: modelIdSchema,
prompt: nonEmptyStringSchema,
})
.catchall(zod_1.z.unknown());
exports.v1AudioSpeechSchema = zod_1.z
.object({
model: modelIdSchema,
input: nonEmptyStringSchema,
})
.catchall(zod_1.z.unknown());
exports.v1ModerationSchema = zod_1.z
.object({
model: modelIdSchema.optional(),
input: zod_1.z.unknown().refine(function (value) {
if (value === undefined || value === null) return false;
if (typeof value === "string") return value.trim().length > 0;
if (Array.isArray(value)) return value.length > 0;
return true;
}, "Input is required"),
})
.catchall(zod_1.z.unknown());
exports.v1RerankSchema = zod_1.z
.object({
model: modelIdSchema,
query: nonEmptyStringSchema,
documents: zod_1.z.array(zod_1.z.unknown()).min(1, "documents must contain at least one item"),
})
.catchall(zod_1.z.unknown());
exports.providerChatCompletionSchema = zod_1.z
.object({
model: modelIdSchema,
messages: zod_1.z.array(chatMessageSchema).min(1).optional(),
input: zod_1.z
.union([nonEmptyStringSchema, zod_1.z.array(zod_1.z.unknown()).min(1)])
.optional(),
prompt: nonEmptyStringSchema.optional(),
})
.catchall(zod_1.z.unknown())
.superRefine(function (value, ctx) {
if (value.messages === undefined && value.input === undefined && value.prompt === undefined) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "messages, input or prompt is required",
path: [],
});
}
});
exports.v1CountTokensSchema = zod_1.z
.object({
messages: zod_1.z
.array(countTokensMessageSchema)
.min(1, "messages must contain at least one item"),
})
.catchall(zod_1.z.unknown());
exports.setBudgetSchema = zod_1.z.object({
apiKeyId: zod_1.z.string().trim().min(1, "apiKeyId is required"),
dailyLimitUsd: zod_1.z.coerce.number().positive("dailyLimitUsd must be greater than zero"),
monthlyLimitUsd: zod_1.z.coerce
.number()
.positive("monthlyLimitUsd must be greater than zero")
.optional(),
warningThreshold: zod_1.z.coerce.number().min(0).max(1).optional(),
});
exports.policyActionSchema = zod_1.z
.object({
action: zod_1.z.enum(["unlock"]),
identifier: zod_1.z.string().trim().min(1).optional(),
})
.superRefine(function (value, ctx) {
if (value.action === "unlock" && !value.identifier) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "identifier is required for unlock action",
path: ["identifier"],
});
}
});
var fallbackChainEntrySchema = zod_1.z
.object({
provider: zod_1.z.string().trim().min(1, "provider is required"),
priority: zod_1.z.number().int().min(1).max(100).optional(),
enabled: zod_1.z.boolean().optional(),
})
.catchall(zod_1.z.unknown());
exports.registerFallbackSchema = zod_1.z.object({
model: modelIdSchema,
chain: zod_1.z.array(fallbackChainEntrySchema).min(1, "chain must contain at least one provider"),
});
exports.removeFallbackSchema = zod_1.z.object({
model: modelIdSchema,
});
exports.updateModelAliasSchema = zod_1.z.object({
model: modelIdSchema,
alias: zod_1.z.string().trim().min(1, "Alias is required").max(200),
});
exports.clearModelAvailabilitySchema = zod_1.z.object({
provider: zod_1.z.string().trim().min(1, "provider is required").max(120),
model: modelIdSchema,
});
exports.providerModelMutationSchema = zod_1.z.object({
provider: zod_1.z.string().trim().min(1, "provider is required").max(120),
modelId: zod_1.z.string().trim().min(1, "modelId is required").max(240),
modelName: zod_1.z.string().trim().max(240).optional(),
source: zod_1.z.string().trim().max(80).optional(),
});
var pricingFieldsSchema = zod_1.z
.object({
input: zod_1.z.number().min(0).optional(),
output: zod_1.z.number().min(0).optional(),
cached: zod_1.z.number().min(0).optional(),
reasoning: zod_1.z.number().min(0).optional(),
cache_creation: zod_1.z.number().min(0).optional(),
})
.strict();
exports.updatePricingSchema = zod_1.z.record(
zod_1.z.string().trim().min(1),
zod_1.z.record(zod_1.z.string().trim().min(1), pricingFieldsSchema)
);
exports.toggleRateLimitSchema = zod_1.z.object({
connectionId: zod_1.z.string().trim().min(1, "connectionId is required"),
enabled: zod_1.z.boolean(),
});
var resilienceProfileSchema = zod_1.z.object({
transientCooldown: zod_1.z.number().min(0),
rateLimitCooldown: zod_1.z.number().min(0),
maxBackoffLevel: zod_1.z.number().int().min(0),
circuitBreakerThreshold: zod_1.z.number().int().min(0),
circuitBreakerReset: zod_1.z.number().min(0),
});
var resilienceDefaultsSchema = zod_1.z
.object({
requestsPerMinute: zod_1.z.number().int().min(1).optional(),
minTimeBetweenRequests: zod_1.z.number().int().min(1).optional(),
concurrentRequests: zod_1.z.number().int().min(1).optional(),
})
.strict();
exports.updateResilienceSchema = zod_1.z
.object({
profiles: zod_1.z
.object({
oauth: resilienceProfileSchema.optional(),
apikey: resilienceProfileSchema.optional(),
})
.strict()
.optional(),
defaults: resilienceDefaultsSchema.optional(),
})
.superRefine(function (value, ctx) {
if (!value.profiles && !value.defaults) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "Must provide profiles or defaults",
path: [],
});
}
});
exports.jsonObjectSchema = zod_1.z.record(zod_1.z.string(), zod_1.z.unknown());
exports.resetStatsActionSchema = zod_1.z.object({
action: zod_1.z.literal("reset-stats"),
});
exports.updateComboDefaultsSchema = zod_1.z
.object({
comboDefaults: comboRuntimeConfigSchema.optional(),
providerOverrides: zod_1.z
.record(zod_1.z.string().trim().min(1), comboRuntimeConfigSchema)
.optional(),
})
.superRefine(function (value, ctx) {
if (!value.comboDefaults && !value.providerOverrides) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "Nothing to update",
path: [],
});
}
});
exports.updateRequireLoginSchema = zod_1.z
.object({
requireLogin: zod_1.z.boolean().optional(),
password: zod_1.z.string().min(4, "Password must be at least 4 characters").optional(),
})
.superRefine(function (value, ctx) {
if (value.requireLogin === undefined && !value.password) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "No valid fields to update",
path: [],
});
}
});
exports.updateSystemPromptSchema = zod_1.z
.object({
prompt: zod_1.z.string().max(50000).optional(),
enabled: zod_1.z.boolean().optional(),
})
.strict()
.superRefine(function (value, ctx) {
if (value.prompt === undefined && value.enabled === undefined) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "No valid fields to update",
path: [],
});
}
});
exports.updateThinkingBudgetSchema = zod_1.z
.object({
mode: zod_1.z.enum(["passthrough", "auto", "custom", "adaptive"]).optional(),
customBudget: zod_1.z.coerce.number().int().min(0).max(131072).optional(),
effortLevel: zod_1.z.enum(["none", "low", "medium", "high"]).optional(),
baseBudget: zod_1.z.coerce.number().int().min(0).max(131072).optional(),
complexityMultiplier: zod_1.z.coerce.number().min(0).optional(),
})
.strict()
.superRefine(function (value, ctx) {
if (
value.mode === undefined &&
value.customBudget === undefined &&
value.effortLevel === undefined &&
value.baseBudget === undefined &&
value.complexityMultiplier === undefined
) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "No valid fields to update",
path: [],
});
}
});
var ipFilterModeSchema = zod_1.z.enum(["blacklist", "whitelist"]);
var tempBanSchema = zod_1.z.object({
ip: zod_1.z.string().trim().min(1),
durationMs: zod_1.z.coerce.number().int().min(1).optional(),
reason: zod_1.z.string().max(200).optional(),
});
exports.updateIpFilterSchema = zod_1.z
.object({
enabled: zod_1.z.boolean().optional(),
mode: ipFilterModeSchema.optional(),
blacklist: zod_1.z.array(zod_1.z.string()).optional(),
whitelist: zod_1.z.array(zod_1.z.string()).optional(),
addBlacklist: zod_1.z.string().optional(),
removeBlacklist: zod_1.z.string().optional(),
addWhitelist: zod_1.z.string().optional(),
removeWhitelist: zod_1.z.string().optional(),
tempBan: tempBanSchema.optional(),
removeBan: zod_1.z.string().optional(),
})
.strict()
.superRefine(function (value, ctx) {
if (Object.keys(value).length === 0) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "No valid fields to update",
path: [],
});
}
});
exports.updateModelAliasesSchema = zod_1.z.object({
aliases: zod_1.z.record(zod_1.z.string().trim().min(1), zod_1.z.string().trim().min(1)),
});
exports.addModelAliasSchema = zod_1.z.object({
from: zod_1.z.string().trim().min(1),
to: zod_1.z.string().trim().min(1),
});
exports.removeModelAliasSchema = zod_1.z.object({
from: zod_1.z.string().trim().min(1),
});
var proxyConfigSchema = zod_1.z
.object({
type: zod_1.z
.preprocess(
function (value) {
return typeof value === "string" ? value.trim().toLowerCase() : value;
},
zod_1.z.enum(["http", "https", "socks5"])
)
.optional(),
host: zod_1.z.string().trim().min(1).optional(),
port: zod_1.z.coerce.number().int().min(1).max(65535).optional(),
username: zod_1.z.string().optional(),
password: zod_1.z.string().optional(),
})
.strict();
exports.updateProxyConfigSchema = zod_1.z
.object({
proxy: proxyConfigSchema.nullable().optional(),
global: proxyConfigSchema.nullable().optional(),
providers: zod_1.z
.record(zod_1.z.string().trim().min(1), proxyConfigSchema.nullable())
.optional(),
combos: zod_1.z.record(zod_1.z.string().trim().min(1), proxyConfigSchema.nullable()).optional(),
keys: zod_1.z.record(zod_1.z.string().trim().min(1), proxyConfigSchema.nullable()).optional(),
level: zod_1.z.enum(["global", "provider", "combo", "key"]).optional(),
id: zod_1.z.string().optional(),
})
.strict()
.superRefine(function (value, ctx) {
var _a;
var hasPayload =
value.proxy !== undefined ||
value.global !== undefined ||
value.providers !== undefined ||
value.combos !== undefined ||
value.keys !== undefined ||
value.level !== undefined;
if (!hasPayload) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "No valid fields to update",
path: [],
});
}
if (value.level !== undefined && value.proxy === undefined) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "proxy is required when level is provided",
path: ["proxy"],
});
}
if (
value.level &&
value.level !== "global" &&
!((_a = value.id) === null || _a === void 0 ? void 0 : _a.trim())
) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "id is required for provider/combo/key level updates",
path: ["id"],
});
}
});
exports.testProxySchema = zod_1.z.object({
proxy: zod_1.z.object({
type: zod_1.z.string().optional(),
host: zod_1.z.string().trim().min(1, "proxy.host is required"),
port: zod_1.z.union([zod_1.z.string(), zod_1.z.number()]),
username: zod_1.z.string().optional(),
password: zod_1.z.string().optional(),
}),
});
var jsonRecordSchema = zod_1.z.record(zod_1.z.string(), zod_1.z.unknown());
var nonEmptyJsonRecordSchema = jsonRecordSchema.refine(function (value) {
return Object.keys(value).length > 0;
}, "Body must be a non-empty object");
var translatorLogFileSchema = zod_1.z.enum([
"1_req_client.json",
"2_req_source.json",
"3_req_openai.json",
"4_req_target.json",
"5_res_provider.txt",
]);
exports.translatorDetectSchema = zod_1.z.object({
body: nonEmptyJsonRecordSchema,
});
exports.translatorSaveSchema = zod_1.z.object({
file: translatorLogFileSchema,
content: zod_1.z.string().min(1, "Content is required").max(1000000, "Content is too large"),
});
exports.translatorSendSchema = zod_1.z.object({
provider: zod_1.z.string().trim().min(1, "Provider is required"),
body: nonEmptyJsonRecordSchema,
});
exports.translatorTranslateSchema = zod_1.z
.object({
step: zod_1.z.union([zod_1.z.number().int().min(1).max(4), zod_1.z.literal("direct")]),
provider: zod_1.z.string().trim().min(1).optional(),
body: nonEmptyJsonRecordSchema,
sourceFormat: zod_1.z.string().optional(),
targetFormat: zod_1.z.string().optional(),
})
.superRefine(function (value, ctx) {
if (value.step !== "direct" && !value.provider) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "Step and provider are required",
path: ["provider"],
});
}
});
exports.oauthExchangeSchema = zod_1.z.object({
code: zod_1.z.string().trim().min(1),
redirectUri: zod_1.z.string().trim().min(1),
codeVerifier: zod_1.z.string().trim().min(1),
state: zod_1.z.string().optional(),
});
exports.oauthPollSchema = zod_1.z.object({
deviceCode: zod_1.z.string().trim().min(1),
codeVerifier: zod_1.z.string().optional(),
extraData: zod_1.z.unknown().optional(),
});
exports.cursorImportSchema = zod_1.z.object({
accessToken: zod_1.z.string().trim().min(1, "Access token is required"),
machineId: zod_1.z.string().trim().min(1, "Machine ID is required"),
});
exports.kiroImportSchema = zod_1.z.object({
refreshToken: zod_1.z.string().trim().min(1, "Refresh token is required"),
});
exports.kiroSocialExchangeSchema = zod_1.z.object({
code: zod_1.z.string().trim().min(1, "Code is required"),
codeVerifier: zod_1.z.string().trim().min(1, "Code verifier is required"),
provider: zod_1.z.enum(["google", "github"]),
});
exports.cloudCredentialUpdateSchema = zod_1.z.object({
provider: zod_1.z.string().trim().min(1, "Provider is required"),
credentials: zod_1.z
.object({
accessToken: zod_1.z.string().optional(),
refreshToken: zod_1.z.string().optional(),
expiresIn: zod_1.z.coerce.number().positive().optional(),
})
.strict()
.superRefine(function (value, ctx) {
if (
value.accessToken === undefined &&
value.refreshToken === undefined &&
value.expiresIn === undefined
) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "At least one credential field must be provided",
path: [],
});
}
}),
});
exports.cloudResolveAliasSchema = zod_1.z.object({
alias: zod_1.z.string().trim().min(1, "Missing alias"),
});
exports.cloudModelAliasUpdateSchema = zod_1.z.object({
model: zod_1.z.string().trim().min(1, "Model and alias required"),
alias: zod_1.z.string().trim().min(1, "Model and alias required"),
});
exports.cloudSyncActionSchema = zod_1.z.object({
action: zod_1.z.enum(["enable", "sync", "disable"]),
});
exports.updateComboSchema = zod_1.z
.object({
name: zod_1.z
.string()
.min(1, "Name is required")
.max(100)
.regex(/^[a-zA-Z0-9_/.-]+$/, "Name can only contain letters, numbers, -, _, / and .")
.optional(),
models: zod_1.z.array(comboModelEntry).optional(),
strategy: comboStrategySchema.optional(),
config: comboRuntimeConfigSchema.optional(),
isActive: zod_1.z.boolean().optional(),
})
.superRefine(function (value, ctx) {
if (
value.name === undefined &&
value.models === undefined &&
value.strategy === undefined &&
value.config === undefined &&
value.isActive === undefined
) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "No valid fields to update",
path: [],
});
}
});
exports.testComboSchema = zod_1.z.object({
comboName: zod_1.z.string().trim().min(1, "comboName is required"),
});
exports.dbBackupRestoreSchema = zod_1.z.object({
backupId: zod_1.z.string().trim().min(1, "backupId is required"),
});
exports.evalRunSuiteSchema = zod_1.z.object({
suiteId: zod_1.z.string().trim().min(1, "suiteId is required"),
outputs: zod_1.z.record(zod_1.z.string(), zod_1.z.unknown()),
});
exports.updateKeyPermissionsSchema = zod_1.z
.object({
allowedModels: zod_1.z.array(zod_1.z.string().trim().min(1)).max(1000).optional(),
noLog: zod_1.z.boolean().optional(),
})
.superRefine(function (value, ctx) {
if (value.allowedModels === undefined && value.noLog === undefined) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "No valid fields to update",
path: [],
});
}
});
exports.createProviderNodeSchema = zod_1.z
.object({
name: zod_1.z.string().trim().min(1, "Name is required"),
prefix: zod_1.z.string().trim().min(1, "Prefix is required"),
apiType: zod_1.z.enum(["chat", "responses"]).optional(),
baseUrl: zod_1.z.string().trim().min(1).optional(),
type: zod_1.z.enum(["openai-compatible", "anthropic-compatible"]).optional(),
})
.superRefine(function (value, ctx) {
var nodeType = value.type || "openai-compatible";
if (nodeType === "openai-compatible" && !value.apiType) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "Invalid OpenAI compatible API type",
path: ["apiType"],
});
}
});
exports.updateProviderNodeSchema = zod_1.z.object({
name: zod_1.z.string().trim().min(1, "Name is required"),
prefix: zod_1.z.string().trim().min(1, "Prefix is required"),
apiType: zod_1.z.enum(["chat", "responses"]).optional(),
baseUrl: zod_1.z.string().trim().min(1, "Base URL is required"),
});
exports.providerNodeValidateSchema = zod_1.z.object({
baseUrl: zod_1.z.string().trim().min(1, "Base URL and API key required"),
apiKey: zod_1.z.string().trim().min(1, "Base URL and API key required"),
type: zod_1.z.enum(["openai-compatible", "anthropic-compatible"]).optional(),
});
exports.updateProviderConnectionSchema = zod_1.z
.object({
name: zod_1.z.string().max(200).optional(),
priority: zod_1.z.coerce.number().int().min(1).max(100).optional(),
globalPriority: zod_1.z
.union([zod_1.z.coerce.number().int().min(1).max(100), zod_1.z.null()])
.optional(),
defaultModel: zod_1.z.union([zod_1.z.string().max(200), zod_1.z.null()]).optional(),
isActive: zod_1.z.boolean().optional(),
apiKey: zod_1.z.string().max(10000).optional(),
testStatus: zod_1.z.string().max(50).optional(),
lastError: zod_1.z.union([zod_1.z.string(), zod_1.z.null()]).optional(),
lastErrorAt: zod_1.z.union([zod_1.z.string(), zod_1.z.null()]).optional(),
lastErrorType: zod_1.z.union([zod_1.z.string(), zod_1.z.null()]).optional(),
lastErrorSource: zod_1.z.union([zod_1.z.string(), zod_1.z.null()]).optional(),
errorCode: zod_1.z.union([zod_1.z.string(), zod_1.z.null()]).optional(),
rateLimitedUntil: zod_1.z.union([zod_1.z.string(), zod_1.z.null()]).optional(),
lastTested: zod_1.z.union([zod_1.z.string(), zod_1.z.null()]).optional(),
healthCheckInterval: zod_1.z.coerce.number().int().min(0).optional(),
})
.superRefine(function (value, ctx) {
if (Object.keys(value).length === 0) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "No valid fields to update",
path: [],
});
}
});
exports.providersBatchTestSchema = zod_1.z
.object({
mode: zod_1.z.enum(["provider", "oauth", "free", "apikey", "compatible", "all"]),
providerId: zod_1.z.string().trim().min(1).optional(),
})
.superRefine(function (value, ctx) {
if (value.mode === "provider" && !value.providerId) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "providerId is required when mode=provider",
path: ["providerId"],
});
}
});
exports.validateProviderApiKeySchema = zod_1.z.object({
provider: zod_1.z.string().trim().min(1, "Provider and API key required"),
apiKey: zod_1.z.string().trim().min(1, "Provider and API key required"),
});
var geminiPartSchema = zod_1.z
.object({
text: zod_1.z.string().optional(),
})
.catchall(zod_1.z.unknown());
var geminiContentSchema = zod_1.z
.object({
role: zod_1.z.string().optional(),
parts: zod_1.z.array(geminiPartSchema).optional(),
})
.catchall(zod_1.z.unknown());
exports.v1betaGeminiGenerateSchema = zod_1.z
.object({
contents: zod_1.z.array(geminiContentSchema).optional(),
systemInstruction: zod_1.z
.object({
parts: zod_1.z.array(geminiPartSchema).optional(),
})
.catchall(zod_1.z.unknown())
.optional(),
generationConfig: zod_1.z
.object({
stream: zod_1.z.boolean().optional(),
maxOutputTokens: zod_1.z.coerce.number().int().min(1).optional(),
temperature: zod_1.z.coerce.number().optional(),
topP: zod_1.z.coerce.number().optional(),
})
.catchall(zod_1.z.unknown())
.optional(),
})
.catchall(zod_1.z.unknown())
.superRefine(function (value, ctx) {
if (!value.contents && !value.systemInstruction) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "contents or systemInstruction is required",
path: [],
});
}
});
exports.cliMitmStartSchema = zod_1.z.object({
apiKey: zod_1.z.string().trim().min(1, "Missing apiKey"),
sudoPassword: zod_1.z.string().optional(),
});
exports.cliMitmStopSchema = zod_1.z.object({
sudoPassword: zod_1.z.string().optional(),
});
exports.cliMitmAliasUpdateSchema = zod_1.z.object({
tool: zod_1.z.string().trim().min(1, "tool and mappings required"),
mappings: zod_1.z.record(zod_1.z.string(), zod_1.z.string().optional()),
});
exports.cliBackupMutationSchema = zod_1.z
.object({
tool: zod_1.z.string().trim().min(1).optional(),
toolId: zod_1.z.string().trim().min(1).optional(),
backupId: zod_1.z.string().trim().min(1, "tool and backupId are required"),
})
.superRefine(function (value, ctx) {
if (!value.tool && !value.toolId) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "tool and backupId are required",
path: ["tool"],
});
}
});
var envKeySchema = zod_1.z
.string()
.trim()
.min(1, "Environment key is required")
.max(120)
.regex(/^[A-Z_][A-Z0-9_]*$/, "Invalid environment key format");
var envValueSchema = zod_1.z
.union([zod_1.z.string(), zod_1.z.number(), zod_1.z.boolean()])
.transform(function (value) {
return String(value);
})
.refine(function (value) {
return value.length > 0;
}, "Environment value is required")
.refine(function (value) {
return value.length <= 10000;
}, "Environment value is too long");
exports.cliSettingsEnvSchema = zod_1.z.object({
env: zod_1.z.record(envKeySchema, envValueSchema).refine(function (value) {
return Object.keys(value).length > 0;
}, "env must contain at least one key"),
});
exports.cliModelConfigSchema = zod_1.z.object({
baseUrl: zod_1.z.string().trim().min(1, "baseUrl and model are required"),
apiKey: zod_1.z.string().optional(),
model: zod_1.z.string().trim().min(1, "baseUrl and model are required"),
});
exports.codexProfileNameSchema = zod_1.z.object({
name: zod_1.z.string().trim().min(1, "Profile name is required"),
});
exports.codexProfileIdSchema = zod_1.z.object({
profileId: zod_1.z.string().trim().min(1, "profileId is required"),
});
exports.guideSettingsSaveSchema = zod_1.z.object({
baseUrl: zod_1.z.string().trim().min(1).optional(),
apiKey: zod_1.z.string().optional(),
model: zod_1.z.string().trim().min(1, "Model is required"),
});
// ──── Helper ────
/**
* Parse and validate request body with a Zod schema.
* Returns { success: true, data } or { success: false, error }.
*/
function validateBody(schema, body) {
var _a;
var result = schema.safeParse(body);
if (result.success) {
return { success: true, data: result.data };
}
var issues = Array.isArray((_a = result.error) === null || _a === void 0 ? void 0 : _a.issues)
? result.error.issues
: [];
return {
success: false,
error: {
message: "Invalid request",
details: issues.map(function (e) {
return {
field: e.path.join("."),
message: e.message,
};
}),
},
};
}
+2
View File
@@ -300,6 +300,8 @@ export const providerModelMutationSchema = z.object({
modelId: z.string().trim().min(1, "modelId is required").max(240),
modelName: z.string().trim().max(240).optional(),
source: z.string().trim().max(80).optional(),
apiFormat: z.enum(["chat-completions", "responses"]).default("chat-completions"),
supportedEndpoints: z.array(z.enum(["chat", "embeddings", "images", "audio"])).default(["chat"]),
});
const pricingFieldsSchema = z
+30 -11
View File
@@ -31,7 +31,12 @@ import { sanitizeRequest } from "../../shared/utils/inputSanitizer";
// Pipeline integration — wired modules
import { getCircuitBreaker, CircuitBreakerOpenError } from "../../shared/utils/circuitBreaker";
import { isModelAvailable, setModelUnavailable } from "../../domain/modelAvailability";
import {
isModelAvailable,
setModelUnavailable,
clearModelUnavailability,
} from "../../domain/modelAvailability";
import { markAccountExhaustedFrom429 } from "../../domain/quotaCache";
import { RequestTelemetry, recordTelemetry } from "../../shared/utils/requestTelemetry";
import { generateRequestId } from "../../shared/utils/requestId";
import { recordCost } from "../../domain/costRules";
@@ -127,7 +132,10 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
telemetry.startPhase("policy");
const policy = await enforceApiKeyPolicy(request, modelStr);
if (policy.rejection) {
log.warn("POLICY", `API key policy rejected: ${modelStr} (key=${policy.apiKeyInfo?.id || "unknown"})`);
log.warn(
"POLICY",
`API key policy rejected: ${modelStr} (key=${policy.apiKeyInfo?.id || "unknown"})`
);
return policy.rejection;
}
const apiKeyInfo = policy.apiKeyInfo;
@@ -243,6 +251,13 @@ async function handleSingleModelChat(
const credentials = await getProviderCredentials(provider, excludeConnectionId);
if (!credentials || credentials.allRateLimited) {
if (lastStatus === 429 || lastStatus === 503) {
setModelUnavailable(provider, model, 60000, `HTTP ${lastStatus}`);
log.info(
"AVAILABILITY",
`${provider}/${model} marked unavailable — all accounts exhausted (HTTP ${lastStatus})`
);
}
return handleNoCredentials(
credentials,
excludeConnectionId,
@@ -296,22 +311,19 @@ async function handleSingleModelChat(
});
if (result.success) {
clearModelUnavailability(provider, model);
recordCostIfNeeded(apiKeyInfo, result);
if (telemetry) telemetry.startPhase("finalize");
if (telemetry) telemetry.endPhase();
return result.response;
}
// Pipeline: Mark model unavailable on repeated failures
if (result.status === 429 || result.status === 503) {
setModelUnavailable(provider, model, 60000, `HTTP ${result.status}`);
log.info(
"AVAILABILITY",
`${provider}/${model} marked unavailable for 60s (HTTP ${result.status})`
);
// 6. Mark account as quota-exhausted on 429 response
if (result.status === 429) {
markAccountExhaustedFrom429(credentials.connectionId, provider);
}
// 6. Fallback to next account
// 7. Fallback to next account
const { shouldFallback } = await markAccountUnavailable(
credentials.connectionId,
result.status,
@@ -357,7 +369,14 @@ async function resolveModelOrError(modelStr: string, body: any) {
const { provider, model } = modelInfo;
const sourceFormat = detectFormat(body);
const providerAlias = PROVIDER_ID_TO_ALIAS[provider] || provider;
const targetFormat = getModelTargetFormat(providerAlias, model) || getTargetFormat(provider);
// If the custom model specifies apiFormat="responses", override targetFormat
// to route through the Responses API translator instead of Chat Completions
let targetFormat = getModelTargetFormat(providerAlias, model) || getTargetFormat(provider);
if ((modelInfo as any).apiFormat === "responses") {
targetFormat = "openai-responses";
log.info("ROUTING", `Custom model apiFormat=responses → targetFormat=openai-responses`);
}
if (modelStr !== `${provider}/${model}`) {
log.info("ROUTING", `${modelStr}${provider}/${model}`);
+16 -3
View File
@@ -34,7 +34,12 @@ const HTTP_STATUS = {
* @param {Function} errorResponse - Error response factory
* @returns {Promise<{ error?: Response, provider: string, model: string, sourceFormat: string, targetFormat: string }>}
*/
export async function resolveModelOrError(modelStr: string, body: any, log: any, errorResponse: Function) {
export async function resolveModelOrError(
modelStr: string,
body: any,
log: any,
errorResponse: Function
) {
const modelInfo = await getModelInfo(modelStr);
if (!modelInfo.provider) {
@@ -44,7 +49,8 @@ export async function resolveModelOrError(modelStr: string, body: any, log: any,
`Ambiguous model '${modelStr}'. Use provider/model prefix (ex: gh/${modelStr} or cc/${modelStr}).`;
log.warn("CHAT", message, {
model: modelStr,
candidates: (modelInfo as any).candidateAliases || (modelInfo as any).candidateProviders || [],
candidates:
(modelInfo as any).candidateAliases || (modelInfo as any).candidateProviders || [],
});
return { error: errorResponse(HTTP_STATUS.BAD_REQUEST, message) };
}
@@ -56,7 +62,14 @@ export async function resolveModelOrError(modelStr: string, body: any, log: any,
const { provider, model } = modelInfo;
const sourceFormat = detectFormat(body);
const providerAlias = PROVIDER_ID_TO_ALIAS[provider] || provider;
const targetFormat = getModelTargetFormat(providerAlias, model) || getTargetFormat(provider);
// If the custom model specifies apiFormat="responses", override targetFormat
// to route through the Responses API translator instead of Chat Completions
let targetFormat = getModelTargetFormat(providerAlias, model) || getTargetFormat(provider);
if ((modelInfo as any).apiFormat === "responses") {
targetFormat = "openai-responses";
log.info("ROUTING", `Custom model apiFormat=responses → targetFormat=openai-responses`);
}
// Log routing
if (modelStr !== `${provider}/${model}`) {
+27 -13
View File
@@ -4,6 +4,7 @@ import {
updateProviderConnection,
getSettings,
} from "@/lib/localDb";
import { isAccountQuotaExhausted } from "@/domain/quotaCache";
import {
isAccountUnavailable,
getUnavailableUntil,
@@ -197,6 +198,19 @@ export async function getProviderCredentials(
return null;
}
// Quota-aware: prioritize accounts with available quota
const withQuota = availableConnections.filter((c) => !isAccountQuotaExhausted(c.id));
const exhaustedQuota = availableConnections.filter((c) => isAccountQuotaExhausted(c.id));
const orderedConnections =
withQuota.length > 0 ? [...withQuota, ...exhaustedQuota] : availableConnections;
if (exhaustedQuota.length > 0) {
log.debug(
"AUTH",
`${provider} | quota-aware: ${withQuota.length} with quota, ${exhaustedQuota.length} exhausted`
);
}
const settings = await getSettings();
const strategy = settings.fallbackStrategy || "fill-first";
@@ -205,7 +219,7 @@ export async function getProviderCredentials(
const stickyLimit = toNumber((settings as Record<string, unknown>).stickyRoundRobinLimit, 3);
// Sort by lastUsed (most recent first) to find current candidate
const byRecency = [...availableConnections].sort((a: any, b: any) => {
const byRecency = [...orderedConnections].sort((a: any, b: any) => {
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
if (!a.lastUsedAt) return 1;
if (!b.lastUsedAt) return -1;
@@ -225,7 +239,7 @@ export async function getProviderCredentials(
});
} else {
// Pick the least recently used (excluding current if possible)
const sortedByOldest = [...availableConnections].sort((a: any, b: any) => {
const sortedByOldest = [...orderedConnections].sort((a: any, b: any) => {
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
if (!a.lastUsedAt) return -1;
if (!b.lastUsedAt) return 1;
@@ -242,14 +256,14 @@ export async function getProviderCredentials(
}
} else if (strategy === "p2c") {
// Power of Two Choices: pick 2 random, choose the one with fewer failures
if (availableConnections.length <= 2) {
connection = availableConnections[0];
if (orderedConnections.length <= 2) {
connection = orderedConnections[0];
} else {
const i = Math.floor(Math.random() * availableConnections.length);
let j = Math.floor(Math.random() * (availableConnections.length - 1));
const i = Math.floor(Math.random() * orderedConnections.length);
let j = Math.floor(Math.random() * (orderedConnections.length - 1));
if (j >= i) j++;
const a = availableConnections[i];
const b = availableConnections[j];
const a = orderedConnections[i];
const b = orderedConnections[j];
// Prefer the one with fewer consecutive uses / better health
const scoreA = (a.consecutiveUseCount || 0) + (a.lastError ? 10 : 0);
const scoreB = (b.consecutiveUseCount || 0) + (b.lastError ? 10 : 0);
@@ -257,11 +271,11 @@ export async function getProviderCredentials(
}
} else if (strategy === "random") {
// Random: Fisher-Yates-inspired random pick
const idx = Math.floor(Math.random() * availableConnections.length);
connection = availableConnections[idx];
const idx = Math.floor(Math.random() * orderedConnections.length);
connection = orderedConnections[idx];
} else if (strategy === "least-used") {
// Least Used: pick the one with oldest lastUsedAt
const sorted = [...availableConnections].sort((a, b) => {
const sorted = [...orderedConnections].sort((a, b) => {
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
if (!a.lastUsedAt) return -1;
if (!b.lastUsedAt) return 1;
@@ -271,13 +285,13 @@ export async function getProviderCredentials(
} else if (strategy === "cost-optimized") {
// Cost Optimized: sort by priority ascending (lower = cheaper/preferred)
// Future: can be enhanced with actual cost data per provider
const sorted = [...availableConnections].sort(
const sorted = [...orderedConnections].sort(
(a, b) => (a.priority || 999) - (b.priority || 999)
);
connection = sorted[0];
} else {
// Default: fill-first (already sorted by priority in getProviderConnections)
connection = availableConnections[0];
connection = orderedConnections[0];
}
return {
+33 -4
View File
@@ -1,5 +1,5 @@
// Re-export from open-sse with localDb integration
import { getModelAliases, getComboByName, getProviderNodes } from "@/lib/localDb";
import { getModelAliases, getComboByName, getProviderNodes, getCustomModels } from "@/lib/localDb";
import {
parseModel,
resolveModelAliasFromMap,
@@ -16,13 +16,30 @@ export async function resolveModelAlias(alias) {
return resolveModelAliasFromMap(alias, aliases);
}
/**
* Look up the apiFormat for a custom model from the DB.
* Returns "responses" if the model is configured for the Responses API, otherwise undefined.
*/
async function lookupCustomModelApiFormat(
providerId: string,
modelId: string
): Promise<string | undefined> {
try {
const models = await getCustomModels(providerId);
if (!Array.isArray(models)) return undefined;
const match = models.find((m: any) => m.id === modelId);
return match?.apiFormat === "responses" ? "responses" : undefined;
} catch {
return undefined;
}
}
/**
* Get full model info (parse or resolve)
*/
export async function getModelInfo(modelStr) {
const parsed = parseModel(modelStr);
// Check custom provider nodes first (for both alias and non-alias formats)
// Check custom provider nodes first (for both alias and non-alias formats)
if (parsed.providerAlias || parsed.provider) {
// Ensure prefixToCheck is always a concise identifier, not a full model string
@@ -32,14 +49,26 @@ export async function getModelInfo(modelStr) {
const openaiNodes = await getProviderNodes({ type: "openai-compatible" });
const matchedOpenAI = openaiNodes.find((node) => node.prefix === prefixToCheck);
if (matchedOpenAI) {
return { provider: matchedOpenAI.id, model: parsed.model };
const apiFormat = await lookupCustomModelApiFormat(
matchedOpenAI.id as string,
parsed.model as string
);
return { provider: matchedOpenAI.id, model: parsed.model, ...(apiFormat && { apiFormat }) };
}
// Check Anthropic Compatible nodes
const anthropicNodes = await getProviderNodes({ type: "anthropic-compatible" });
const matchedAnthropic = anthropicNodes.find((node) => node.prefix === prefixToCheck);
if (matchedAnthropic) {
return { provider: matchedAnthropic.id, model: parsed.model };
const apiFormat = await lookupCustomModelApiFormat(
matchedAnthropic.id as string,
parsed.model as string
);
return {
provider: matchedAnthropic.id,
model: parsed.model,
...(apiFormat && { apiFormat }),
};
}
}