Compare commits
447 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1e9a9adbad | |||
| d87c7c3b8c | |||
| eb3c834609 | |||
| e53c76081f | |||
| 134316328c | |||
| 4767561f02 | |||
| 2d6b31b606 | |||
| a22f0a4e7b | |||
| 5a244aa12a | |||
| 69d28bec4d | |||
| c859665c6b | |||
| e7b19758f3 | |||
| 623c63baf6 | |||
| a3ad7c6c2e | |||
| afc9362ca5 | |||
| f6b125e8c2 | |||
| 5df3c22be8 | |||
| 11a0df5443 | |||
| e27a2a0d55 | |||
| dc8abe60ee | |||
| afe2ab37e4 | |||
| f7bd99f965 | |||
| f5238944b4 | |||
| c7ae9c30c2 | |||
| 82f7a12a46 | |||
| f494a8531b | |||
| 36ed0499db | |||
| 46cff2200d | |||
| 5ea6ad4a9e | |||
| 6cad4fae8e | |||
| 8df24c855b | |||
| f25882c0e9 | |||
| be6c769192 | |||
| a4276444b5 | |||
| 0af27b8d8a | |||
| 542eb0e719 | |||
| c658b39270 | |||
| 52ef3dfc7e | |||
| 57da407693 | |||
| d2d6fc5883 | |||
| 6a7a6022d4 | |||
| b53eafa615 | |||
| c949214e99 | |||
| 887cf25b65 | |||
| dd6142196f | |||
| 902c7244d1 | |||
| 4f11762c68 | |||
| 8a7f7c1ba0 | |||
| af46f87eed | |||
| fd749d1e0b | |||
| 5046f90dfa | |||
| cf13e95610 | |||
| 5763609008 | |||
| 6d672ab09a | |||
| ac68022233 | |||
| c2b31f6b20 | |||
| 54b1d8c8de | |||
| cd1ab696b2 | |||
| d9d0640f6e | |||
| e19046116a | |||
| 82a621ec08 | |||
| ce560ebe9d | |||
| f900a81ec9 | |||
| 2a620b178d | |||
| 5aaaad529b | |||
| 4e15ca6bbd | |||
| 8555a3a106 | |||
| 3a39ae6231 | |||
| 00e6b7cf2c | |||
| ebd1e5421b | |||
| 2feb2df30d | |||
| ba38dc738b | |||
| 9233edb6cd | |||
| f697c56922 | |||
| 5ab6a3b431 | |||
| 25cd0b3612 | |||
| 6e1bf4652d | |||
| 3991c96c78 | |||
| 70008e67e4 | |||
| 0d1e6685f6 | |||
| 60d1d13c6a | |||
| ade1e5f345 | |||
| 0a59ef4996 | |||
| ace3e47d22 | |||
| 9865401636 | |||
| 1343768de1 | |||
| 743cfcbb3c | |||
| 44ee62e391 | |||
| 47445ebf5c | |||
| 6b0638da30 | |||
| 74a0585683 | |||
| 9aaebbcd2a | |||
| 2348680a2b | |||
| 59a8b8db35 | |||
| 2ba46a622b | |||
| 9013236718 | |||
| eab96cc94b | |||
| 7f66e82f16 | |||
| c74054d928 | |||
| 5428c9b9a8 | |||
| db0bda2810 | |||
| c4efad3ea7 | |||
| 62995b640d | |||
| 9a476d310a | |||
| bc09eb64a6 | |||
| 8e4873d53e | |||
| 5874bd3de6 | |||
| a1dc2270a2 | |||
| 1c2f553a7c | |||
| 049ddff6fe | |||
| 9429d1639e | |||
| af7ca4baf7 | |||
| 7906fdfc1d | |||
| 9911272898 | |||
| 8909ae041e | |||
| c9a15772b0 | |||
| 91f3bd4056 | |||
| 2306081dab | |||
| 8963c62adb | |||
| e4a11bd6d0 | |||
| 2f6e63771f | |||
| cbd60c853e | |||
| 2d8091340f | |||
| 2025c16c82 | |||
| 811fb7f9b2 | |||
| a2bd32e76c | |||
| 89c07f4e8a | |||
| ac89069671 | |||
| 7cb420d8e6 | |||
| d3919d441f | |||
| 4b5824babc | |||
| fb87df14fd | |||
| da9e4e929b | |||
| 10b23b15ae | |||
| 30fba39b35 | |||
| 5a75ff67c9 | |||
| 358828b617 | |||
| e080c4a16a | |||
| 04b7e38baf | |||
| 7ee23fbe19 | |||
| c49bdb4ebb | |||
| 0f7efed8d5 | |||
| d07bc6dcf3 | |||
| d607d46fa3 | |||
| 2225dd14aa | |||
| f6c0e7bbbe | |||
| c4675c5219 | |||
| 2d977a3c4d | |||
| 9405918258 | |||
| a69d7dd4b5 | |||
| 428e6cb53f | |||
| c9a2955d28 | |||
| 7aefcd3437 | |||
| 79f4f79c46 | |||
| c11c275678 | |||
| bbcd1d3a08 | |||
| 3342d5b931 | |||
| f96ee44213 | |||
| bc53fe0cd9 | |||
| 97a67b5d3e | |||
| 1ffa58be76 | |||
| a5cf51c0b9 | |||
| 3d38cbf70f | |||
| 196a4e037c | |||
| 6a0760a2c5 | |||
| b0819404c7 | |||
| bfe495931f | |||
| 11bcdd810a | |||
| 228ebf436e | |||
| d2b6624de4 | |||
| 0dd6d349b3 | |||
| ecb0774b7b | |||
| 6ab32b351f | |||
| e09d4a02a2 | |||
| 3de8b4371a | |||
| 396ab2bab5 | |||
| 305fb56b62 | |||
| 0f22f38f7e | |||
| 084b206ae6 | |||
| 0d3728efa4 | |||
| 2b067c5d00 | |||
| 21135407af | |||
| c38a58fc98 | |||
| 20e4b1b011 | |||
| 9691469987 | |||
| 63114af08d | |||
| e78ede45b6 | |||
| 751ff77b7c | |||
| 5a53c17e81 | |||
| 9c32c30daf | |||
| baa0208fa9 | |||
| 2ec0cd13cd | |||
| 0d8f28a4a4 | |||
| 33dfbf0177 | |||
| 85c6b63c8f | |||
| d19f336286 | |||
| 052eb8d330 | |||
| 3510d8c0bc | |||
| 20860877b8 | |||
| bddec84f4e | |||
| aba12ad5db | |||
| 6ea8d094b2 | |||
| b5a3a3d019 | |||
| 5ecef5c90c | |||
| fe9d9a5a5c | |||
| e18cfe1d80 | |||
| 7eb45b2e19 | |||
| 70465ada4d | |||
| 8ddea153d3 | |||
| 8dca8fba6b | |||
| f21ba7df64 | |||
| ef917e42d1 | |||
| 865a1b9b2c | |||
| de8a0836a8 | |||
| b8272c55d7 | |||
| 8d93c13f9a | |||
| 8152b030bf | |||
| 9352ac767f | |||
| 5f20029ff7 | |||
| dbd00117c8 | |||
| 2902a0fe26 | |||
| 7ba57634c1 | |||
| 211dde25d0 | |||
| 57ff59aef2 | |||
| c39faba2b5 | |||
| 212bca2e1e | |||
| f807c56e31 | |||
| 5510c25040 | |||
| 9d884d2d60 | |||
| f26fa67374 | |||
| ccb314e065 | |||
| 0517dcf0b7 | |||
| b7f0665ce9 | |||
| 144628755d | |||
| 7c5bb2c6b6 | |||
| afadb0fea1 | |||
| b6c9c8a822 | |||
| 11f43ca65c | |||
| 8dce812a4d | |||
| 93047069b6 | |||
| c4f1990aff | |||
| 6e2816f08b | |||
| 227268024d | |||
| 8d5891a382 | |||
| 7700fca501 | |||
| 527c542d6d | |||
| 8fbae5e467 | |||
| 4d2a5efd12 | |||
| 5ffa14190a | |||
| 7820145cbe | |||
| b7a6c563ac | |||
| 52221488d0 | |||
| 4a1acb1446 | |||
| dc90211222 | |||
| 378c9f321d | |||
| e11bcc2848 | |||
| 3f10430150 | |||
| e8b72b54b3 | |||
| d902dda4b1 | |||
| 2538480b95 | |||
| b9b8c93cb9 | |||
| 68b7b35425 | |||
| 163c5feccc | |||
| 0488f0536e | |||
| 09e90ec25b | |||
| d97a11a54f | |||
| 4a779dfe3c | |||
| 61e09d545f | |||
| 3a68d7dabc | |||
| 22d318f201 | |||
| afa2cea678 | |||
| 6dce45505c | |||
| 014732788c | |||
| 0e75d838ab | |||
| 8383da8a50 | |||
| 199d173816 | |||
| f2829441f0 | |||
| 21137bd84a | |||
| a05e51a577 | |||
| 09a094629c | |||
| 90de0fbf68 | |||
| c9cdd5109b | |||
| 0e207dc5d2 | |||
| 876a5a98f4 | |||
| 8e82350d66 | |||
| de75ed1551 | |||
| 87266104a3 | |||
| fed8140404 | |||
| e4d83e91bb | |||
| a3153d893a | |||
| be219449f9 | |||
| 06d193f0d9 | |||
| 4f413615d9 | |||
| 2a79b833fb | |||
| 52e3d4b37b | |||
| d9b393a308 | |||
| 9a3d72c6a2 | |||
| 5b9b1cdd44 | |||
| d624ddde03 | |||
| d3ace8d611 | |||
| 177507bbc8 | |||
| d915e2a868 | |||
| a3d15cf971 | |||
| 5e72cd34f0 | |||
| b004d0472b | |||
| 6ed98fb21c | |||
| d8bf4b1db8 | |||
| fb2351ffe7 | |||
| 12f7d2b484 | |||
| 2c40ef0964 | |||
| ceb778a040 | |||
| 2c5a546759 | |||
| 3286f05b3b | |||
| 6a6a868845 | |||
| 56de009756 | |||
| deb855f9d4 | |||
| 7da0675907 | |||
| f8a22e71f7 | |||
| 5229290ac4 | |||
| 01c1bbfe29 | |||
| 234e14a30b | |||
| 0b0e503615 | |||
| e8be72ad28 | |||
| 3ca7b9eafa | |||
| 7feac7a158 | |||
| 3d7d02a10a | |||
| 7bab9e09d3 | |||
| c2a777580a | |||
| 952b0b22c7 | |||
| 4cfd1b94e1 | |||
| 1647005d6e | |||
| 369a0141de | |||
| cc97917ee2 | |||
| 0f49f82405 | |||
| 0d58a92479 | |||
| cde0a3bf4e | |||
| 344e602b26 | |||
| b941515c5a | |||
| fb597c677e | |||
| d0138a5037 | |||
| 2c2e0a95a1 | |||
| e0d5a78dd3 | |||
| 428a8490f8 | |||
| 96114bf92e | |||
| 77a3eece32 | |||
| 107b9e8cd2 | |||
| 187aba0514 | |||
| 2126b0ee8d | |||
| 113ac1c940 | |||
| fbaf30a6bf | |||
| 8abdf68718 | |||
| 67fa2592b5 | |||
| fed445c991 | |||
| 62069dac98 | |||
| 5a65585c16 | |||
| 4e3b363ba6 | |||
| f1d421bd8a | |||
| fb840d6392 | |||
| e659d2ee69 | |||
| ab1b0c890a | |||
| dbe6a4e30c | |||
| ce1e10c8c6 | |||
| 8caef4b688 | |||
| bc55911d0f | |||
| bc6b084c77 | |||
| 3d86ad7dc8 | |||
| 0844659e00 | |||
| e07edc663b | |||
| 5f38173387 | |||
| 481a630273 | |||
| 8592d02951 | |||
| 7f34835693 | |||
| 0ac264b39d | |||
| 87c7c83dd9 | |||
| 10d3120cdf | |||
| 27b9c331b7 | |||
| e1fe304dd3 | |||
| d226d68251 | |||
| a6014524ef | |||
| 1a98a6c966 | |||
| 0d13f4645c | |||
| 88d5986ac1 | |||
| f7fb68a798 | |||
| 5811e677f1 | |||
| 0d9a98c4e1 | |||
| 05d8d3d71d | |||
| f670a1e451 | |||
| 1591737528 | |||
| f74a007e27 | |||
| eb290a90cb | |||
| 9b80f723df | |||
| f2ace011ff | |||
| 0c0a56d4de | |||
| 981d163278 | |||
| 96cdd9bccb | |||
| 354d0b5f09 | |||
| 3d2de04dd1 | |||
| 93a220ba83 | |||
| d55b6e0b7a | |||
| 779957526b | |||
| f76482db87 | |||
| 1be20a4e2d | |||
| ff965234c9 | |||
| 62facba06f | |||
| 3c309f1fa4 | |||
| 090343aa01 | |||
| e674e5d87b | |||
| 2a90a05132 | |||
| a0af564b5a | |||
| ca2b1faa72 | |||
| bf49fdf0bf | |||
| c8989ddead | |||
| 4ea0426034 | |||
| 619c99ce4c | |||
| 86c566669c | |||
| 9aad413809 | |||
| 6afcebabab | |||
| ad1cc64e5a | |||
| 243cc4b60b | |||
| ddb02d6464 | |||
| f24abf074b | |||
| ff01e9edaa | |||
| 168b17adc7 | |||
| 7e0c6f0307 | |||
| dd573aed6f | |||
| b87af5d053 | |||
| 29b3e59d23 | |||
| ce6d7dc6bf | |||
| 19eeebae95 | |||
| 1dd05bffe8 | |||
| ac3d251a1a | |||
| 238e080928 | |||
| 7ed40c2139 | |||
| d2bee37e76 | |||
| 5a2fdacebe | |||
| 343e6c50e3 | |||
| 90d2dcac97 | |||
| 631ed4d97f | |||
| 9485985608 | |||
| b32db28a3d | |||
| 1516429b87 | |||
| 5668e16fbf | |||
| bd4a076942 | |||
| f0a0c97b5e | |||
| 7ffe21e23d | |||
| 4b137d8e72 | |||
| 6ba48241fe |
@@ -0,0 +1,76 @@
|
||||
---
|
||||
description: Deploy the latest OmniRoute code to the Akamai VPS (69.164.221.35) via npm
|
||||
---
|
||||
|
||||
# Deploy to VPS Workflow
|
||||
|
||||
Deploy OmniRoute to the production VPS using `npm install -g` + PM2.
|
||||
|
||||
**VPS:** `69.164.221.35` (Akamai, Ubuntu 24.04, 1GB RAM + 2.5GB swap)
|
||||
**Local VPS:** `192.168.0.15` (same setup)
|
||||
**Process manager:** PM2 (`omniroute`)
|
||||
**Port:** `20128`
|
||||
|
||||
> [!IMPORTANT]
|
||||
> PM2 runs from the global npm package at `/usr/lib/node_modules/omniroute`.
|
||||
> **DO NOT** use git clone or local copies. The `npm install -g` command handles
|
||||
> building, publishing, and installing the standalone app in one step.
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Publish to npm
|
||||
|
||||
Ensure the version in `package.json` is bumped and the package is published:
|
||||
|
||||
```bash
|
||||
npm publish
|
||||
```
|
||||
|
||||
### 2. Install on VPS and restart PM2
|
||||
|
||||
// turbo-all
|
||||
|
||||
```bash
|
||||
ssh root@69.164.221.35 "npm install -g omniroute@latest && pm2 restart omniroute && pm2 save && echo '✅ Deploy complete!'"
|
||||
```
|
||||
|
||||
For the local VPS:
|
||||
|
||||
```bash
|
||||
ssh root@192.168.0.15 "npm install -g omniroute@latest && pm2 restart omniroute && pm2 save && echo '✅ Deploy complete!'"
|
||||
```
|
||||
|
||||
### 3. Verify the deployment
|
||||
|
||||
```bash
|
||||
ssh root@69.164.221.35 "pm2 list && cat \$(npm root -g)/omniroute/package.json | grep version | head -1 && curl -s -o /dev/null -w 'HTTP %{http_code}' http://localhost:20128/"
|
||||
```
|
||||
|
||||
Expected: PM2 shows `online`, version matches published, HTTP returns `307` (redirect to login).
|
||||
|
||||
## How it works
|
||||
|
||||
1. `npm publish` builds Next.js standalone + bundles everything into the npm package
|
||||
2. `npm install -g omniroute@latest` downloads and installs to `/usr/lib/node_modules/omniroute/`
|
||||
3. PM2 is registered to run `npm start` from that directory (cwd: `/usr/lib/node_modules/omniroute`)
|
||||
4. `pm2 restart omniroute` picks up the new code immediately
|
||||
|
||||
## PM2 Setup (one-time)
|
||||
|
||||
If PM2 needs to be reconfigured from scratch:
|
||||
|
||||
```bash
|
||||
ssh root@<VPS> "
|
||||
cd /usr/lib/node_modules/omniroute &&
|
||||
PORT=20128 pm2 start app/server.js --name omniroute --env PORT=20128 &&
|
||||
pm2 save &&
|
||||
pm2 startup
|
||||
"
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- The `.env` file is at `/usr/lib/node_modules/omniroute/.env`. Back it up before major npm updates.
|
||||
- PM2 is configured with `pm2 startup` to auto-restart on reboot.
|
||||
- Nginx proxies `omniroute.online` → `localhost:20128`.
|
||||
- The VPS has only 1GB RAM — builds happen locally via `npm publish`, not on the VPS.
|
||||
@@ -0,0 +1,110 @@
|
||||
---
|
||||
description: Create a new release, bump version up to 1.x.10 threshold, update changelog, and manage Pull Requests
|
||||
---
|
||||
|
||||
# Generate Release Workflow
|
||||
|
||||
Bump version, finalize CHANGELOG, commit, tag, push, publish to npm, and create GitHub release.
|
||||
|
||||
> **VERSION RULE: Always use PATCH bumps (2.x.y → 2.x.y+1)**
|
||||
> NEVER use `npm version minor` or `npm version major`.
|
||||
> Always use: `npm version patch --no-git-tag-version`
|
||||
> The threshold rule: when `y` reaches 10, bump to `2.(x+1).0` — e.g. `2.1.10` → `2.2.0`.
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Determine new version
|
||||
|
||||
Check current version in `package.json` and increment the **patch** number only:
|
||||
|
||||
```bash
|
||||
grep '"version"' package.json
|
||||
```
|
||||
|
||||
Version format: `2.x.y` — examples:
|
||||
|
||||
- `2.1.2` → `2.1.3` (patch)
|
||||
- `2.1.9` → `2.1.10` (patch)
|
||||
- `2.1.10` → `2.2.0` (minor threshold — do manually with `sed`)
|
||||
|
||||
```bash
|
||||
# ALWAYS use patch:
|
||||
npm version patch --no-git-tag-version
|
||||
```
|
||||
|
||||
### 2. Regenerate lock file (REQUIRED after version bump)
|
||||
|
||||
**Mandatory** — skipping causes `@swc/helpers` lock mismatch and CI failures:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
### 3. Finalize CHANGELOG.md
|
||||
|
||||
Replace `[Unreleased]` header with the new version and date.
|
||||
Keep an empty `## [Unreleased]` section above it.
|
||||
|
||||
```markdown
|
||||
## [Unreleased]
|
||||
|
||||
---
|
||||
|
||||
## [2.x.y] — YYYY-MM-DD
|
||||
```
|
||||
|
||||
### 4. Update openapi.yaml version ⚠️ MANDATORY
|
||||
|
||||
> **CI will fail** if `docs/openapi.yaml` version ≠ `package.json` version (`check:docs-sync` enforces this).
|
||||
|
||||
// turbo
|
||||
|
||||
```bash
|
||||
VERSION=$(node -p "require('./package.json').version") && sed -i "s/ version: .*/ version: $VERSION/" docs/openapi.yaml && echo "✓ openapi.yaml → $VERSION"
|
||||
```
|
||||
|
||||
### 5. Stage, commit, and tag
|
||||
|
||||
// turbo-all
|
||||
|
||||
```bash
|
||||
git add package.json package-lock.json CHANGELOG.md docs/openapi.yaml
|
||||
git commit -m "chore(release): v2.x.y — summary of changes"
|
||||
git tag -a v2.x.y -m "Release v2.x.y"
|
||||
```
|
||||
|
||||
### 6. Push to GitHub
|
||||
|
||||
```bash
|
||||
git push origin main --tags
|
||||
```
|
||||
|
||||
### 7. Create GitHub release
|
||||
|
||||
```bash
|
||||
gh release create v2.x.y --title "v2.x.y — summary" --notes "..."
|
||||
```
|
||||
|
||||
### 8. Deploy to VPS (if requested)
|
||||
|
||||
See `/deploy-vps` workflow for Akamai VPS or use npm for local VPS:
|
||||
|
||||
```bash
|
||||
ssh root@<VPS_IP> "npm install -g omniroute@2.x.y && pm2 restart omniroute"
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- Always run `/update-docs` BEFORE this workflow (ensures CHANGELOG and README are current)
|
||||
- The `prepublishOnly` script runs `npm run build:cli` automatically during `npm publish`
|
||||
- After npm publish, verify with `npm info omniroute version`
|
||||
- Lock file sync errors are caused by skipping `npm install` after version bump
|
||||
|
||||
## Known CI Pitfalls
|
||||
|
||||
| CI failure | Cause | Fix |
|
||||
| ------------------------------------------------------------------------- | -------------------------------------------------------- | ---------------------------------------------------------------------- |
|
||||
| `[docs-sync] FAIL - OpenAPI version differs from package.json` | Skipped step 4 — `docs/openapi.yaml` version not updated | Run step 4 (`sed -i ...`) and commit |
|
||||
| `[docs-sync] FAIL - CHANGELOG.md first section must be "## [Unreleased]"` | `## [Unreleased]` missing or not at top of CHANGELOG | Add `## [Unreleased]\n\n---\n` before the first versioned `## [x.y.z]` |
|
||||
| Electron Linux `.deb` build fails (`FpmTarget` error) | `fpm` Ruby gem not installed on `ubuntu-latest` runner | Already fixed in `electron-release.yml` (`gem install fpm` step) |
|
||||
| Docker Hub `502 error writing layer blob` | Transient Docker Hub network error during ARM64 push | Re-run the Docker publish workflow; no code change needed |
|
||||
@@ -0,0 +1,131 @@
|
||||
---
|
||||
description: Analyze open feature request issues, implement viable ones on dedicated branches, and respond to authors
|
||||
---
|
||||
|
||||
# /implement-features — Feature Request Implementation Workflow
|
||||
|
||||
## Overview
|
||||
|
||||
Fetches open feature request issues, analyzes each against the current codebase, implements viable ones on dedicated branches, and responds to authors with results. Does NOT merge to main — leaves branches for author validation.
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Identify the Repository
|
||||
|
||||
// turbo
|
||||
|
||||
- Run: `git -C <project_root> remote get-url origin` to extract owner/repo
|
||||
|
||||
### 2. Fetch Open Feature Request Issues
|
||||
|
||||
// turbo
|
||||
|
||||
- Run: `gh issue list --repo <owner>/<repo> --state open --limit 50 --json number,title,labels,body,comments,createdAt,author`
|
||||
- Filter for issues that are feature requests (label `enhancement`/`feature`, or body describes new functionality, or previously classified as feature request)
|
||||
- Sort by oldest first
|
||||
|
||||
### 3. Analyze Each Feature Request
|
||||
|
||||
For each feature request issue, perform a **two-level analysis**:
|
||||
|
||||
#### Level 1 — Viability Assessment
|
||||
|
||||
Ask yourself:
|
||||
|
||||
- Does this feature align with the project's goals and architecture?
|
||||
- Is the request technically feasible with the current codebase?
|
||||
- Does it duplicate existing functionality?
|
||||
- Would it introduce breaking changes or security risks?
|
||||
- Is there enough detail to implement it?
|
||||
|
||||
**Verdict options:**
|
||||
|
||||
1. ✅ **VIABLE** — Makes sense, enough detail to implement → Go to Level 2
|
||||
2. ❓ **NEEDS MORE INFO** — Good idea but insufficient detail → Post comment asking for specifics
|
||||
3. ❌ **NOT VIABLE** — Doesn't fit the project or is fundamentally flawed → Post comment explaining why, close issue
|
||||
|
||||
#### Level 2 — Implementation (only for VIABLE features)
|
||||
|
||||
1. **Research** — Read all related source files to understand the current architecture
|
||||
2. **Design** — Plan the implementation, filling gaps in the original request
|
||||
3. **Create branch** — Name format: `feat/issue-<NUMBER>-<short-slug>`
|
||||
```bash
|
||||
git checkout main
|
||||
git pull origin main
|
||||
git checkout -b feat/issue-<NUMBER>-<short-slug>
|
||||
```
|
||||
4. **Implement** — Build the complete solution following project patterns
|
||||
5. **Build** — Run `npm run build` to verify compilation
|
||||
6. **Commit** — Commit with: `feat: <description> (#<NUMBER>)`
|
||||
7. **Push** — Push the branch: `git push -u origin feat/issue-<NUMBER>-<short-slug>`
|
||||
8. **Return to main** — `git checkout main`
|
||||
|
||||
### 4. Respond to Authors
|
||||
|
||||
#### For VIABLE (implemented) features:
|
||||
|
||||
// turbo
|
||||
Post a comment on the issue:
|
||||
|
||||
````markdown
|
||||
## ✅ Feature Implemented!
|
||||
|
||||
Hi @<author>! We've analyzed your request and implemented it on a dedicated branch.
|
||||
|
||||
**Branch:** `feat/issue-<NUMBER>-<short-slug>`
|
||||
|
||||
### What was implemented:
|
||||
|
||||
- <bullet list of what was done>
|
||||
|
||||
### How to try it:
|
||||
|
||||
```bash
|
||||
git fetch origin
|
||||
git checkout feat/issue-<NUMBER>-<short-slug>
|
||||
npm install && npm run dev
|
||||
```
|
||||
````
|
||||
|
||||
### Next steps:
|
||||
|
||||
1. **Test it** — Please verify it works as you expected
|
||||
2. **Want to improve it?** — You're welcome to contribute! Just:
|
||||
```bash
|
||||
git checkout feat/issue-<NUMBER>-<short-slug>
|
||||
# Make your improvements
|
||||
git add -A && git commit -m "improve: <your changes>"
|
||||
git push origin feat/issue-<NUMBER>-<short-slug>
|
||||
```
|
||||
Then open a Pull Request from your branch to `main` 🎉
|
||||
3. **Not quite right?** — Let us know in this issue what needs to change
|
||||
|
||||
Looking forward to your feedback! 🚀
|
||||
|
||||
```
|
||||
|
||||
#### For NEEDS MORE INFO:
|
||||
// turbo
|
||||
Post a comment asking for specific missing details needed to implement, e.g.:
|
||||
- "Could you describe the exact behavior when X happens?"
|
||||
- "Which API endpoints should be affected?"
|
||||
- "Should this apply to all providers or only specific ones?"
|
||||
|
||||
Add the context of WHY you need each piece of information.
|
||||
|
||||
#### For NOT VIABLE:
|
||||
// turbo
|
||||
Post a polite comment explaining why the feature doesn't fit at this time:
|
||||
- If the idea is decent but timing is wrong: "This is an interesting idea, but it doesn't align with our current priorities. Feel free to open a new issue with more details if you'd like us to reconsider."
|
||||
- If fundamentally flawed: Explain the technical or architectural reasons why it won't work, suggest alternatives if possible.
|
||||
- Close the issue after posting the comment.
|
||||
|
||||
### 5. Summary Report
|
||||
Present a summary report to the user via `notify_user`:
|
||||
|
||||
| Issue | Title | Verdict | Branch / Action |
|
||||
|---|---|---|---|
|
||||
| #N | Title | ✅ Implemented | `feat/issue-N-slug` |
|
||||
| #N | Title | ❓ Needs Info | Comment posted |
|
||||
| #N | Title | ❌ Not Viable | Closed with explanation |
|
||||
```
|
||||
@@ -0,0 +1,50 @@
|
||||
---
|
||||
description: How to respond to GitHub issues with insufficient information
|
||||
---
|
||||
|
||||
# Issue Triage Workflow
|
||||
|
||||
Respond to GitHub issues that need more information before they can be investigated.
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Identify issues needing triage
|
||||
|
||||
```bash
|
||||
gh issue list --state open --limit 20
|
||||
```
|
||||
|
||||
### 2. Evaluate each issue
|
||||
|
||||
Check if the issue has:
|
||||
|
||||
- Clear reproduction steps
|
||||
- Environment details (OS, Node.js version, OmniRoute version)
|
||||
- Error logs/screenshots
|
||||
- Expected vs actual behavior
|
||||
|
||||
### 3. Respond with triage template
|
||||
|
||||
For issues missing information:
|
||||
|
||||
```markdown
|
||||
Thank you for reporting this issue! To help us investigate, please provide:
|
||||
|
||||
1. **OmniRoute version**: (`omniroute --version`)
|
||||
2. **Node.js version**: (`node --version`)
|
||||
3. **Operating system**: (e.g., Ubuntu 24.04, macOS 15, Windows 11)
|
||||
4. **Installation method**: (npm, Docker, source)
|
||||
5. **Steps to reproduce**: (exact commands/actions that trigger the issue)
|
||||
6. **Error logs**: (paste relevant logs from the console)
|
||||
7. **Expected behavior**: (what should happen)
|
||||
|
||||
This will help us debug and resolve your issue faster. 🙏
|
||||
```
|
||||
|
||||
### 4. Label the issue
|
||||
|
||||
Add appropriate labels: `needs-info`, `bug`, `enhancement`, `question`, etc.
|
||||
|
||||
```bash
|
||||
gh issue edit <NUMBER> --add-label "needs-info"
|
||||
```
|
||||
@@ -0,0 +1,120 @@
|
||||
---
|
||||
description: Fetch all open GitHub issues, analyze bugs, resolve what's possible, triage the rest, wait for user validation, then commit and release
|
||||
---
|
||||
|
||||
# /resolve-issues — Automated Issue Resolution Workflow
|
||||
|
||||
## Overview
|
||||
|
||||
This workflow fetches all open issues from the project's GitHub repository, classifies them, analyzes bugs, resolves what can be fixed, and triages issues with insufficient information. **It does NOT merge or release automatically** — it creates a PR and waits for user validation before merging.
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Identify the GitHub Repository
|
||||
|
||||
// turbo
|
||||
|
||||
- Run: `git -C <project_root> remote get-url origin` to extract the owner/repo
|
||||
- Parse the owner and repo name from the URL
|
||||
|
||||
### 2. Fetch All Open Issues
|
||||
|
||||
// turbo
|
||||
|
||||
- Run: `gh issue list --repo <owner>/<repo> --state open --limit 100 --json number,title,labels,body,comments,createdAt,author`
|
||||
- Parse the JSON output to get a list of all open issues
|
||||
- Sort by oldest first (FIFO)
|
||||
|
||||
### 3. Classify Each Issue
|
||||
|
||||
For each issue, determine its type:
|
||||
|
||||
- **Bug** — Has `bug` label, or body contains error messages, stack traces, "doesn't work", "broken", "crash", "error"
|
||||
- **Feature Request** — Has `enhancement`/`feature` label, or body describes new functionality
|
||||
- **Question** — Has `question` label, or is asking "how to" something
|
||||
- **Other** — Anything else
|
||||
|
||||
Focus ONLY on **Bugs** for resolution. Feature requests and questions should be skipped with a note in the final report.
|
||||
|
||||
### 4. Analyze Each Bug — For each bug issue:
|
||||
|
||||
#### 4a. Check Information Sufficiency
|
||||
|
||||
Verify the issue contains enough information to reproduce and fix:
|
||||
|
||||
- [ ] Clear description of the problem
|
||||
- [ ] Steps to reproduce
|
||||
- [ ] Error messages or logs
|
||||
- [ ] Expected vs actual behavior
|
||||
|
||||
#### 4b. If Information Is INSUFFICIENT
|
||||
|
||||
Call the `/issue-triage` workflow (located at `~/.gemini/antigravity/global_workflows/issue-triage.md`):
|
||||
// turbo
|
||||
|
||||
- Post a comment asking for more details using `gh issue comment`
|
||||
- Add `needs-info` label using `gh issue edit`
|
||||
- Mark this issue as **DEFERRED** and move to the next one
|
||||
|
||||
#### 4c. If Information Is SUFFICIENT
|
||||
|
||||
Proceed with resolution:
|
||||
|
||||
1. **Create a fix branch** — `git checkout -b fix/issue-<NUMBER>-<short-description>`
|
||||
2. **Research** — Search the codebase for files related to the issue
|
||||
3. **Root Cause** — Identify the root cause by reading the relevant source files
|
||||
4. **Implement Fix** — Apply the fix following existing code patterns and conventions
|
||||
5. **Test** — Build the project and run tests to verify the fix
|
||||
6. **Commit** — Commit with message format: `fix: <description> (#<issue_number>)`
|
||||
|
||||
### 5. Generate Report & Wait for Validation
|
||||
|
||||
Present a summary report to the user via `notify_user` with `BlockedOnUser: true`:
|
||||
|
||||
| Issue | Title | Status | Action |
|
||||
| ----- | ----- | ------------- | ----------------------------- |
|
||||
| #N | Title | ✅ Ready | Files changed (not committed) |
|
||||
| #N | Title | ❓ Needs Info | Triage comment posted |
|
||||
| #N | Title | ⏭️ Skipped | Feature request / not a bug |
|
||||
|
||||
> **⚠️ IMPORTANT**: Do NOT commit, close issues, or generate releases at this step.
|
||||
> Wait for the user to review the changes and respond with **OK** before proceeding.
|
||||
|
||||
- If the user says **OK** or approves → Proceed to step 6
|
||||
- If the user requests changes → Apply the requested adjustments first, then present the report again
|
||||
- If the user rejects → Revert the changes and stop
|
||||
|
||||
### 6. Commit & Push Fix Branch (only after user approval)
|
||||
|
||||
After the user validates:
|
||||
|
||||
- Commit each fix individually with message format: `fix: <description> (#<issue_number>)`
|
||||
- Push the fix branch: `git push origin fix/issue-<NUMBER>-<short-description>`
|
||||
- Create a PR: `gh pr create --title "fix: <description> (#<issue_number>)" --body "<details>" --base main`
|
||||
|
||||
### 7. 🛑 WAIT — Notify User & Await PR Verification
|
||||
|
||||
**This is a mandatory stop point.** Use `notify_user` with `BlockedOnUser: true`:
|
||||
|
||||
- Inform the user that the PR was created and is **awaiting their verification**
|
||||
- Include the PR number, URL, and a summary of what was changed
|
||||
- **DO NOT merge, close issues, generate releases, or deploy until the user confirms**
|
||||
|
||||
Wait for the user to respond:
|
||||
|
||||
- **User confirms** → Proceed to step 8
|
||||
- **User requests changes** → Apply changes, push to the same branch, notify again
|
||||
- **User rejects** → Close the PR and stop
|
||||
|
||||
### 8. Merge, Close Issues & Release (only after user confirms PR)
|
||||
|
||||
After the user confirms the PR:
|
||||
|
||||
1. **Merge** the PR: `gh pr merge <NUMBER> --merge --repo <owner>/<repo>` or via local merge
|
||||
2. **Close** resolved issues with a comment: `gh issue close <NUMBER> --repo <owner>/<repo> --comment "Fixed in <commit_hash>. The fix will be included in the next release."`
|
||||
3. **Switch to main**: `git checkout main && git pull`
|
||||
4. Run the `/update-docs` workflow (at `~/.gemini/antigravity/global_workflows/update-docs.md`) to update CHANGELOG and README
|
||||
5. Run the `/generate-release` workflow (at `.agents/workflows/generate-release.md`) to bump version, tag, and publish
|
||||
6. Deploy to local VPS: `ssh root@192.168.0.15 "npm install -g omniroute@<VERSION> && pm2 restart omniroute"`
|
||||
|
||||
If NO fixes were committed, skip this step and just present the report.
|
||||
@@ -0,0 +1,145 @@
|
||||
---
|
||||
description: Analyze open Pull Requests from the project's GitHub repository, generate a critical report, and optionally implement approved changes
|
||||
---
|
||||
|
||||
# /review-prs — PR Review & Analysis Workflow
|
||||
|
||||
## Overview
|
||||
|
||||
This workflow fetches all open PRs from the project's GitHub repository, performs a critical analysis of each one, generates a detailed report, and waits for user approval before proceeding with implementation. **All improvements are committed on top of the PR branch** and the user must verify before merge.
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Identify the GitHub Repository
|
||||
|
||||
- Read `package.json` to get the repository URL, or use the git remote origin URL
|
||||
// turbo
|
||||
- Run: `git -C <project_root> remote get-url origin` to extract the owner/repo
|
||||
|
||||
### 2. Fetch Open Pull Requests
|
||||
|
||||
- Navigate to `https://github.com/<owner>/<repo>/pulls` and scrape all open PRs
|
||||
- For each open PR, collect:
|
||||
- PR number, title, author, branch, number of commits, date
|
||||
- PR description/body
|
||||
- Files changed (diff)
|
||||
- Existing review comments (from bots or humans)
|
||||
|
||||
### 3. Analyze Each PR — For each open PR, perform the following analysis:
|
||||
|
||||
#### 3a. Feature Assessment
|
||||
|
||||
- **Does it make sense?** Evaluate if the feature fills a real gap or solves a valid problem
|
||||
- **Alignment** — Check if it aligns with the project's architecture and roadmap
|
||||
- **Complexity** — Assess if the scope is reasonable or if it should be split
|
||||
|
||||
#### 3b. Code Quality Review
|
||||
|
||||
- Check for code duplication
|
||||
- Evaluate error handling patterns (consistent with existing codebase?)
|
||||
- Check naming conventions and code style
|
||||
- Verify TypeScript types (any `any` usage, missing types?)
|
||||
|
||||
#### 3c. Security Review
|
||||
|
||||
- Check for missing authentication/authorization on new endpoints
|
||||
- Check for injection vulnerabilities (URL params, SQL, XSS)
|
||||
- Verify input validation on all user-controlled data
|
||||
- Check for hardcoded secrets or credentials
|
||||
|
||||
#### 3d. Architecture Review
|
||||
|
||||
- Does the change follow existing patterns?
|
||||
- Are there any breaking changes to public APIs?
|
||||
- Is the database schema affected? Migration needed?
|
||||
- Impact on performance (N+1 queries, missing indexes?)
|
||||
|
||||
#### 3e. Test Coverage
|
||||
|
||||
- Does the PR include tests?
|
||||
- Are edge cases covered?
|
||||
- Would existing tests break?
|
||||
|
||||
#### 3f. Cross-Layer (Global) Analysis
|
||||
|
||||
Perform a **global impact assessment** to verify whether the PR changes are complete across all layers of the application:
|
||||
|
||||
- **Backend → Frontend check**: If the PR adds or modifies backend-only resources (new endpoints, services, data models), evaluate whether corresponding frontend changes are missing:
|
||||
- Does a new endpoint require a new screen/page in the dashboard?
|
||||
- Should there be a new action button, menu item, or navigation link?
|
||||
- Are there new data fields that should be displayed or editable in the UI?
|
||||
- Does a new feature need a toggle, configuration panel, or status indicator?
|
||||
- **Frontend → Backend check**: If the PR adds frontend elements, verify the backend support exists:
|
||||
- Are the required API endpoints implemented?
|
||||
- Is the data model sufficient for the new UI components?
|
||||
- **Cross-cutting concerns**: Check shared layers (types, DTOs, validation schemas, routes, middleware) for completeness
|
||||
- **Document gaps** — If missing layers are detected, list them as **IMPORTANT** issues in the report with concrete suggestions for what should be added
|
||||
|
||||
### 4. Generate Report — Create a markdown report for each PR including:
|
||||
|
||||
- **PR Summary** — What it does, files affected, commit count
|
||||
- **Improvements/Benefits** — Numbered list with impact level (HIGH/MEDIUM/LOW)
|
||||
- **Risks & Issues** — Categorized as CRITICAL / IMPORTANT / MINOR
|
||||
- **Scoring Table** — Rate across: Feature Relevance, Code Quality, Security, Robustness, Tests
|
||||
- **Verdict** — Ready to merge? With mandatory vs optional fixes
|
||||
- **Next Steps** — What will happen if approved
|
||||
|
||||
### 5. Present to User
|
||||
|
||||
- Show the report via `notify_user` with `BlockedOnUser: true`
|
||||
- Wait for user decision:
|
||||
- **Approved** → Proceed to step 6
|
||||
- **Approved with changes** → Implement the fixes and corrections before merging
|
||||
- **Rejected** → Close the PR or leave a review comment
|
||||
|
||||
### 6. Implementation (if approved)
|
||||
|
||||
- Checkout the PR branch: `gh pr checkout <NUMBER>`
|
||||
- Implement any required fixes identified in the analysis
|
||||
- If the Cross-Layer Analysis (3f) identified missing frontend/backend counterparts, implement them
|
||||
- **Commit improvements on top of the PR branch** with descriptive commit messages
|
||||
- Run the project's test suite to verify nothing breaks
|
||||
// turbo
|
||||
- Run: `npm test` or equivalent test command
|
||||
- Build the project to verify compilation
|
||||
// turbo
|
||||
- Run: `npm run build` or equivalent build command
|
||||
- Push the updated branch: `git push origin <branch-name>`
|
||||
|
||||
### 7. 🛑 WAIT — Notify User & Await PR Verification
|
||||
|
||||
**This is a mandatory stop point.** Use `notify_user` with `BlockedOnUser: true`:
|
||||
|
||||
- Inform the user that the PR has been **improved and pushed**, and is **awaiting their verification**
|
||||
- Include:
|
||||
- PR number and URL
|
||||
- Summary of improvements/fixes applied
|
||||
- Build/test status
|
||||
- List of files changed
|
||||
- **DO NOT merge, generate releases, or deploy until the user confirms**
|
||||
|
||||
Wait for the user to respond:
|
||||
|
||||
- **User confirms** → Proceed to step 8
|
||||
- **User requests more changes** → Apply changes, push to the same branch, notify again
|
||||
- **User rejects** → Leave a review comment and stop
|
||||
|
||||
### 8. Thank the Contributor
|
||||
|
||||
- Post a **thank-you comment** on the PR via the GitHub API
|
||||
- The message should:
|
||||
- Thank the author by name/username for their contribution
|
||||
- Briefly mention what the PR accomplishes and any improvements applied
|
||||
- Be friendly, professional, and encouraging
|
||||
- Example: _"Thanks @author for this great contribution! 🎉 The [feature/fix] is now merged and will be part of the next release. We appreciate your effort!"_
|
||||
|
||||
### 9. Merge & Release (only after user confirms PR)
|
||||
|
||||
After the user confirms the PR:
|
||||
|
||||
1. **Merge** the PR into main (local merge with `--no-ff` or via `gh pr merge`)
|
||||
2. **Push** to main: `git push origin main`
|
||||
3. **Clean up** the feature branch: `git branch -d <branch-name>`
|
||||
4. **Update CHANGELOG.md** with the new feature/fix
|
||||
5. Run the `/generate-release` workflow (at `.agents/workflows/generate-release.md`) to bump version, tag, and publish
|
||||
6. Deploy to local VPS: `ssh root@192.168.0.15 "npm install -g omniroute@<VERSION> && pm2 restart omniroute"`
|
||||
@@ -0,0 +1,105 @@
|
||||
---
|
||||
description: How to automatically summarize recent changes and update README and CHANGELOG
|
||||
---
|
||||
|
||||
# Update Documentation Workflow
|
||||
|
||||
Update CHANGELOG.md, README.md, docs/ files, and all multi-language translations whenever features are added or changed.
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Summarize recent changes
|
||||
|
||||
Review git log and identify new features, fixes, or changes since the last release tag:
|
||||
|
||||
```bash
|
||||
git log $(git describe --tags --abbrev=0)..HEAD --oneline
|
||||
```
|
||||
|
||||
### 2. Update English CHANGELOG.md
|
||||
|
||||
Add an `[Unreleased]` section (or version header if releasing) with:
|
||||
|
||||
- `### ✨ New Features` — each feature as a bullet point
|
||||
- `### 🐛 Bug Fixes` — if applicable
|
||||
- `### 🧪 Tests` — test count changes
|
||||
- `### 📁 New Files` — table of new files with purpose
|
||||
|
||||
### 3. Update English README.md
|
||||
|
||||
Update the feature tables in these sections:
|
||||
|
||||
- **🧠 Routing & Intelligence** — for routing/model features
|
||||
- **🛡️ Resilience & Security** — for security/resilience features
|
||||
- **📊 Observability & Analytics** — for monitoring features
|
||||
- **☁️ Deploy & Sync** — for deployment features
|
||||
|
||||
### 4. Update docs/ files
|
||||
|
||||
- `docs/FEATURES.md` — update the Settings section description
|
||||
- `docs/API_REFERENCE.md` — add new API routes if any
|
||||
- `docs/ARCHITECTURE.md` — update architecture if structural changes
|
||||
|
||||
### 5. 🌐 Sync Multi-Language Documentation (CRITICAL)
|
||||
|
||||
// turbo-all
|
||||
|
||||
**This step MUST be run after every README or docs update.**
|
||||
|
||||
The project has **30 language versions** of documentation:
|
||||
|
||||
**README files (root directory):**
|
||||
|
||||
```
|
||||
README.md (English - source of truth)
|
||||
README.pt-BR.md README.pt.md README.es.md README.fr.md README.it.md
|
||||
README.de.md README.nl.md README.sv.md README.no.md README.da.md README.fi.md
|
||||
README.ru.md README.uk-UA.md README.bg.md README.sk.md README.pl.md README.ro.md README.hu.md
|
||||
README.ar.md README.he.md README.th.md README.in.md README.id.md README.ms.md README.vi.md
|
||||
README.ja.md README.ko.md README.zh-CN.md README.phi.md
|
||||
```
|
||||
|
||||
**docs/i18n/ directories (29 languages):**
|
||||
|
||||
```
|
||||
docs/i18n/{ar,bg,da,de,es,fi,fr,he,hu,id,in,it,ja,ko,ms,nl,no,phi,pl,pt,pt-BR,ro,ru,sk,sv,th,uk-UA,vi,zh-CN}/
|
||||
Each contains: API_REFERENCE.md, ARCHITECTURE.md, CODEBASE_DOCUMENTATION.md, FEATURES.md, TROUBLESHOOTING.md, USER_GUIDE.md
|
||||
```
|
||||
|
||||
**Sync approach for feature table updates:**
|
||||
|
||||
a. Identify which feature table rows were added to English README.md
|
||||
b. For each translated README, find the corresponding anchor lines:
|
||||
|
||||
- **Routing section:** Find the `💬` (System Prompt) table row — the line before it is always the last routing feature. Insert new routing features before System Prompt.
|
||||
- **Resilience section:** Find the `📊` Rate Limits table row (the one in lines 590-600, NOT the quota tracking one in lines 560-570). Insert new resilience features after it.
|
||||
c. The new feature entries can stay in English for technical features, matching the pattern used in the existing translations.
|
||||
d. Use `sed` or similar tool to batch-insert across all 29 translated READMEs.
|
||||
|
||||
**Verification:**
|
||||
|
||||
```bash
|
||||
# Verify all READMEs have the new features
|
||||
grep -l "NEW_FEATURE_NAME" README.*.md | wc -l
|
||||
# Should return 30 (all language versions)
|
||||
```
|
||||
|
||||
**FEATURES.md sync:**
|
||||
|
||||
```bash
|
||||
# Update Settings description in all docs/i18n/*/FEATURES.md
|
||||
for dir in docs/i18n/*/; do
|
||||
# Update the Settings section description to mention new features
|
||||
# Check FEATURES.md in each directory
|
||||
done
|
||||
```
|
||||
|
||||
### 6. Verify documentation changes
|
||||
|
||||
```bash
|
||||
# Check all modified files
|
||||
git status --short
|
||||
|
||||
# Verify no broken markdown
|
||||
# Optional: run markdownlint if available
|
||||
```
|
||||
@@ -23,7 +23,15 @@ SQLITE_MAX_SIZE_MB=2048
|
||||
SQLITE_CLEAN_LEGACY_FILES=true
|
||||
|
||||
# Recommended runtime variables
|
||||
# Canonical/base port (keeps backward compatibility)
|
||||
PORT=20128
|
||||
# Optional split ports:
|
||||
# API_PORT=20129
|
||||
# API_HOST=0.0.0.0
|
||||
# DASHBOARD_PORT=20128
|
||||
# Optional Docker production host publish ports:
|
||||
# PROD_DASHBOARD_PORT=20130
|
||||
# PROD_API_PORT=20131
|
||||
NODE_ENV=production
|
||||
INSTANCE_NAME=omniroute
|
||||
|
||||
@@ -122,6 +130,22 @@ GEMINI_CLI_OAUTH_CLIENT_SECRET=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
|
||||
# IFLOW_OAUTH_CLIENT_ID=
|
||||
IFLOW_OAUTH_CLIENT_SECRET=4Z3YjXycVsQvyGF1etiNlIBB4RsqSDtW
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Provider User-Agent Overrides (optional — customize per-provider UA headers)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Format: {PROVIDER_ID}_USER_AGENT=custom-value
|
||||
# When set, overrides the default User-Agent header sent to that provider.
|
||||
# Useful when providers update versions or block old user-agents.
|
||||
CLAUDE_USER_AGENT=claude-cli/1.0.83 (external, cli)
|
||||
CODEX_USER_AGENT=codex-cli/0.92.0 (Windows 10.0.26100; x64)
|
||||
GITHUB_USER_AGENT=GitHubCopilotChat/0.26.7
|
||||
ANTIGRAVITY_USER_AGENT=antigravity/1.104.0 darwin/arm64
|
||||
KIRO_USER_AGENT=AWS-SDK-JS/3.0.0 kiro-ide/1.0.0
|
||||
IFLOW_USER_AGENT=iFlow-Cli
|
||||
QWEN_USER_AGENT=google-api-nodejs-client/9.15.1
|
||||
CURSOR_USER_AGENT=connect-es/1.6.1
|
||||
GEMINI_CLI_USER_AGENT=google-api-nodejs-client/9.15.1
|
||||
|
||||
# API Key Providers (Phase 1 + Phase 4)
|
||||
# Add via Dashboard → Providers → Add API Key, or set here
|
||||
# DEEPSEEK_API_KEY=
|
||||
@@ -153,3 +177,24 @@ LOG_TO_FILE=true
|
||||
# LOG_FILE_PATH=logs/application/app.log
|
||||
# LOG_MAX_FILE_SIZE=50M
|
||||
# LOG_RETENTION_DAYS=7
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Memory Optimization (Low-RAM configurations)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Node.js heap limit in MB (default: 256 for Docker, system default for npm)
|
||||
# OMNIROUTE_MEMORY_MB=256
|
||||
|
||||
# Prompt cache settings
|
||||
# PROMPT_CACHE_MAX_SIZE=50
|
||||
# PROMPT_CACHE_MAX_BYTES=2097152
|
||||
# PROMPT_CACHE_TTL_MS=300000
|
||||
|
||||
# Semantic cache settings (temperature=0 responses)
|
||||
# SEMANTIC_CACHE_MAX_SIZE=100
|
||||
# SEMANTIC_CACHE_MAX_BYTES=4194304
|
||||
# SEMANTIC_CACHE_TTL_MS=1800000
|
||||
|
||||
# In-memory log buffers
|
||||
# PROXY_LOG_MAX_ENTRIES=200
|
||||
# CALL_LOGS_MAX=200
|
||||
# STREAM_HISTORY_MAX=50
|
||||
|
||||
@@ -10,6 +10,9 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Lint
|
||||
@@ -22,6 +25,12 @@ jobs:
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- run: npm run lint
|
||||
- run: npm run check:cycles
|
||||
- run: npm run check:route-validation:t06
|
||||
- run: npm run check:any-budget:t11
|
||||
- run: npm run check:docs-sync
|
||||
- run: npm run typecheck:core
|
||||
- run: npm run typecheck:noimplicit:core
|
||||
|
||||
security:
|
||||
name: Security Audit
|
||||
@@ -127,7 +136,6 @@ jobs:
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- run: npm run test:integration
|
||||
continue-on-error: true
|
||||
|
||||
test-security:
|
||||
name: Security Tests
|
||||
@@ -144,4 +152,3 @@ jobs:
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- run: npm run test:security
|
||||
continue-on-error: true
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
name: Codex PR Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
|
||||
jobs:
|
||||
request-codex-review:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Request Codex Review
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.payload.pull_request.number,
|
||||
body: '@codex review'
|
||||
});
|
||||
@@ -0,0 +1,40 @@
|
||||
name: Deploy to VPS
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Publish to Docker Hub"]
|
||||
types: [completed]
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
if: >-
|
||||
(github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success')
|
||||
&& vars.DEPLOY_ENABLED == 'true'
|
||||
name: Deploy OmniRoute to VPS
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy via SSH
|
||||
uses: appleboy/ssh-action@v1
|
||||
continue-on-error: true
|
||||
with:
|
||||
host: ${{ secrets.VPS_HOST }}
|
||||
username: ${{ secrets.VPS_USER }}
|
||||
key: ${{ secrets.VPS_SSH_KEY }}
|
||||
port: 22
|
||||
timeout: 30s
|
||||
command_timeout: 5m
|
||||
script: |
|
||||
echo "=== Updating OmniRoute ==="
|
||||
npm install -g omniroute@latest 2>&1
|
||||
INSTALLED_VERSION=$(omniroute --version 2>/dev/null || echo "unknown")
|
||||
echo "Installed version: $INSTALLED_VERSION"
|
||||
|
||||
echo "=== Restarting PM2 ==="
|
||||
pm2 restart omniroute || pm2 start omniroute --name omniroute -- --port 20128
|
||||
pm2 save
|
||||
|
||||
echo "=== Health Check ==="
|
||||
sleep 3
|
||||
curl -sf http://localhost:20128/api/settings > /dev/null && echo "✅ OmniRoute is healthy" || echo "❌ Health check failed"
|
||||
echo "=== Deploy complete ==="
|
||||
@@ -9,41 +9,53 @@ permissions:
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
name: Build & Push Docker Image
|
||||
name: Build and Push Docker (multi-arch)
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
IMAGE_NAME: diegosouzapw/omniroute
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Set up QEMU (for multi-arch builds)
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Extract version from release tag
|
||||
id: version
|
||||
run: |
|
||||
VERSION="${GITHUB_REF_NAME}"
|
||||
VERSION="${VERSION#v}"
|
||||
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
echo "Publishing Docker image version: $VERSION"
|
||||
echo "Publishing Docker image: $IMAGE_NAME:$VERSION"
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
- name: Build and push multi-arch image
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
context: .
|
||||
target: runner-base
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: |
|
||||
diegosouzapw/omniroute:${{ steps.version.outputs.version }}
|
||||
diegosouzapw/omniroute:latest
|
||||
${{ env.IMAGE_NAME }}:${{ steps.version.outputs.version }}
|
||||
${{ env.IMAGE_NAME }}:latest
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
platforms: linux/amd64
|
||||
no-cache: false
|
||||
env:
|
||||
DOCKER_BUILDKIT_INLINE_CACHE: 1
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect "${{ env.IMAGE_NAME }}:${{ steps.version.outputs.version }}"
|
||||
|
||||
- name: Update Docker Hub description
|
||||
uses: peter-evans/dockerhub-description@v5
|
||||
|
||||
@@ -0,0 +1,203 @@
|
||||
name: Build Electron Desktop App
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: "Release version (e.g., v1.6.8)"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
name: Validate version
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.validate.outputs.version }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Validate version format
|
||||
id: validate
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "push" ]]; then
|
||||
VERSION="${GITHUB_REF#refs/tags/}"
|
||||
else
|
||||
VERSION="${{ inputs.version }}"
|
||||
fi
|
||||
|
||||
if [[ ! "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
echo "Error: Invalid version format. Expected: v1.6.8"
|
||||
exit 1
|
||||
fi
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "✓ Valid version: $VERSION"
|
||||
|
||||
build:
|
||||
name: Build Electron (${{ matrix.platform }})
|
||||
needs: validate
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- platform: windows
|
||||
runner: windows-latest
|
||||
target: win
|
||||
ext: .exe
|
||||
- platform: macos-intel
|
||||
runner: macos-15-intel
|
||||
target: mac-x64
|
||||
ext: .dmg
|
||||
- platform: macos-arm64
|
||||
runner: macos-latest
|
||||
target: mac-arm64
|
||||
ext: -arm64.dmg
|
||||
- platform: linux
|
||||
runner: ubuntu-latest
|
||||
target: linux
|
||||
ext: .AppImage
|
||||
deb_ext: .deb
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
|
||||
- name: Cache node_modules
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: node_modules
|
||||
key: ${{ runner.os }}-node-${{ hashFiles('package-lock.json') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-node-
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Build Next.js standalone
|
||||
env:
|
||||
JWT_SECRET: ci-build-secret-with-sufficient-length-for-validation
|
||||
run: npm run build
|
||||
|
||||
- name: Sync version in electron/package.json
|
||||
shell: bash
|
||||
run: |
|
||||
VERSION="${{ needs.validate.outputs.version }}"
|
||||
VERSION_NO_V="${VERSION#v}"
|
||||
node -e "
|
||||
const fs = require('fs');
|
||||
const pkg = JSON.parse(fs.readFileSync('electron/package.json'));
|
||||
pkg.version = '$VERSION_NO_V';
|
||||
fs.writeFileSync('electron/package.json', JSON.stringify(pkg, null, 2) + '\\n');
|
||||
"
|
||||
echo "✓ electron/package.json version set to $VERSION_NO_V"
|
||||
|
||||
- name: Install fpm (Linux .deb packaging tool)
|
||||
if: matrix.platform == 'linux'
|
||||
run: sudo gem install fpm --no-document
|
||||
|
||||
- name: Install Electron dependencies
|
||||
working-directory: electron
|
||||
run: npm install --no-audit --no-fund
|
||||
|
||||
- name: Build Electron for ${{ matrix.platform }}
|
||||
working-directory: electron
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: npm run build:${{ matrix.target }}
|
||||
|
||||
- name: Collect installers
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p release-assets
|
||||
cd electron/dist-electron
|
||||
# Copy only installer files for this platform
|
||||
for file in *${{ matrix.ext }}; do
|
||||
[ -f "$file" ] && cp "$file" ../../release-assets/
|
||||
done
|
||||
# Linux: also copy .deb package
|
||||
if [ "${{ matrix.platform }}" = "linux" ]; then
|
||||
for file in *.deb; do
|
||||
[ -f "$file" ] && cp "$file" ../../release-assets/
|
||||
done
|
||||
fi
|
||||
# Windows: also copy portable standalone exe as OmniRoute.exe
|
||||
if [ "${{ matrix.platform }}" = "windows" ]; then
|
||||
for file in *.exe; do
|
||||
# Skip the NSIS installer (contains "Setup")
|
||||
case "$file" in *Setup*) continue ;; esac
|
||||
[ -f "$file" ] && cp "$file" "../../release-assets/OmniRoute.exe" && break
|
||||
done
|
||||
fi
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: electron-${{ matrix.platform }}
|
||||
path: release-assets/
|
||||
|
||||
release:
|
||||
name: Create Release
|
||||
needs: [validate, build]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Download all artifacts
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
path: release-assets
|
||||
merge-multiple: true
|
||||
|
||||
- name: Create source archives
|
||||
run: |
|
||||
# Create source code archives (excluding dev dependencies and build artifacts)
|
||||
export TARBALL="OmniRoute-${{ needs.validate.outputs.version }}.source.tar.gz"
|
||||
export ZIPBALL="OmniRoute-${{ needs.validate.outputs.version }}.source.zip"
|
||||
|
||||
# Use git archive for clean source export
|
||||
git archive --format=tar.gz --prefix=OmniRoute-${{ needs.validate.outputs.version }}/ HEAD -o "release-assets/$TARBALL"
|
||||
git archive --format=zip --prefix=OmniRoute-${{ needs.validate.outputs.version }}/ HEAD -o "release-assets/$ZIPBALL"
|
||||
|
||||
echo "✓ Created source archives:"
|
||||
ls -lh "release-assets/$TARBALL" "release-assets/$ZIPBALL"
|
||||
|
||||
- name: List release files
|
||||
run: ls -la release-assets/
|
||||
|
||||
- name: Create Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ needs.validate.outputs.version }}
|
||||
draft: false
|
||||
prerelease: false
|
||||
generate_release_notes: true
|
||||
fail_on_unmatched_files: false
|
||||
files: |
|
||||
release-assets/*.dmg
|
||||
release-assets/*.exe
|
||||
release-assets/*.AppImage
|
||||
release-assets/*.deb
|
||||
release-assets/*.blockmap
|
||||
release-assets/*.source.tar.gz
|
||||
release-assets/*.source.zip
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -22,21 +22,30 @@ jobs:
|
||||
node-version: 22
|
||||
registry-url: https://registry.npmjs.org
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Build standalone app
|
||||
run: npm run build:cli
|
||||
- name: Install dependencies (skip scripts to avoid heavy build)
|
||||
run: npm install --ignore-scripts --no-audit --no-fund
|
||||
|
||||
- name: Sync version from release tag
|
||||
run: |
|
||||
VERSION="${GITHUB_REF_NAME}"
|
||||
# Remove 'v' prefix if present (v0.1.0 -> 0.1.0)
|
||||
# Remove 'v' prefix if present (v2.1.0 -> 2.1.0)
|
||||
VERSION="${VERSION#v}"
|
||||
npm version "$VERSION" --no-git-tag-version --allow-same-version
|
||||
echo "Publishing version: $VERSION"
|
||||
|
||||
- name: Build CLI bundle (standalone app)
|
||||
env:
|
||||
JWT_SECRET: ci-build-secret-with-sufficient-length-for-validation
|
||||
run: node scripts/prepublish.mjs
|
||||
|
||||
- name: Publish to npm
|
||||
run: npm publish --access public
|
||||
run: |
|
||||
VERSION=$(node -p "require('./package.json').version")
|
||||
# Check if this version is already published — skip instead of failing with E403
|
||||
if npm view "omniroute@${VERSION}" version --silent 2>/dev/null | grep -q "^${VERSION}$"; then
|
||||
echo "️⚠️ Version ${VERSION} is already published on npm — skipping."
|
||||
exit 0
|
||||
fi
|
||||
npm publish --access public
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
+37
-6
@@ -1,7 +1,12 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# project-specific directories
|
||||
.omnivscodeagent/
|
||||
omnirouteCloud/
|
||||
omnirouteSite/
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
node_modules/
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
@@ -11,10 +16,10 @@
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
coverage/
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
@@ -63,6 +68,7 @@ docs/*
|
||||
!docs/TASK_NEBIUS_BACKEND_ENABLEMENT.md
|
||||
!docs/frontend-backend-provider-gap-report.md
|
||||
!docs/openapi.yaml
|
||||
!docs/RELEASE_CHECKLIST.md
|
||||
!docs/PLANO-IMPLANTACAO.md
|
||||
!docs/TASKS.md
|
||||
!docs/FASE-*.md
|
||||
@@ -74,6 +80,11 @@ docs/*
|
||||
!docs/VM_DEPLOYMENT_GUIDE.md
|
||||
!docs/FEATURES.md
|
||||
!docs/screenshots/
|
||||
!docs/i18n/
|
||||
!docs/i18n/**
|
||||
!docs/A2A-SERVER.md
|
||||
!docs/AUTO-COMBO.md
|
||||
!docs/MCP-SERVER.md
|
||||
|
||||
# open-sse tests
|
||||
open-sse/test/*
|
||||
@@ -86,13 +97,33 @@ test-results/
|
||||
playwright-report/
|
||||
blob-report/
|
||||
cloud/
|
||||
omnirouteCloud/
|
||||
omnirouteSite/
|
||||
|
||||
# Security Analysis (standalone project with own git)
|
||||
security-analysis/
|
||||
|
||||
# Deploy workflow (contains sensitive VPS credentials)
|
||||
.agent/workflows/deploy.md
|
||||
clipr/
|
||||
app.log
|
||||
*.tgz
|
||||
|
||||
# Backup directories
|
||||
app.__qa_backup/
|
||||
|
||||
# Production standalone build (created by scripts/prepublish.mjs)
|
||||
# Conflicts with Next.js App Router detection in dev (root app/ shadows src/app/)
|
||||
# npm publish still includes it via package.json "files" field
|
||||
/app/
|
||||
|
||||
# Electron (subproject dependency lock and build artifacts)
|
||||
electron/package-lock.json
|
||||
electron/dist-electron/
|
||||
electron/node_modules/
|
||||
icon.iconset/
|
||||
|
||||
# VS Code Extension (independent Git repo)
|
||||
vscode-extension/
|
||||
|
||||
# SQLite residual files
|
||||
*.sqlite-shm
|
||||
*.sqlite-wal
|
||||
*.sqlite-journal
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
Unified AI proxy/router — route any LLM through one endpoint. Multi-provider support
|
||||
(OpenAI, Anthropic, Gemini, DeepSeek, Groq, xAI, Mistral, Fireworks, Cohere, etc.)
|
||||
with **MCP Server** (16 tools for agent control) and **A2A v0.3 Protocol** (Agent-to-Agent orchestration).
|
||||
|
||||
## Stack
|
||||
|
||||
@@ -13,6 +14,7 @@ Unified AI proxy/router — route any LLM through one endpoint. Multi-provider s
|
||||
- **Streaming**: SSE via `open-sse` internal package
|
||||
- **Styling**: Tailwind CSS v4
|
||||
- **Docker**: Multi-stage Dockerfile, 3 profiles (base / cli / host)
|
||||
- **i18n**: next-intl with 30 languages (`src/i18n/messages/`)
|
||||
|
||||
## Architecture
|
||||
|
||||
@@ -47,6 +49,56 @@ but the real logic lives in `src/lib/db/`.
|
||||
|
||||
Translation between provider formats: `open-sse/translator/`
|
||||
|
||||
### MCP Server (`open-sse/mcp-server/`)
|
||||
|
||||
16 tools for AI agent control via **3 transport modes**:
|
||||
- **stdio** — Local IDE integration (Claude Desktop, Cursor, VS Code)
|
||||
- **SSE** — Remote Server-Sent Events at `/api/mcp/sse`
|
||||
- **Streamable HTTP** — Modern bidirectional HTTP at `/api/mcp/stream`
|
||||
|
||||
HTTP transports run in-process via `httpTransport.ts` singleton using `WebStandardStreamableHTTPServerTransport`.
|
||||
|
||||
| Category | Tools |
|
||||
| ---------- | ------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Essential | `get_health`, `list_combos`, `get_combo_metrics`, `switch_combo`, `check_quota`, `route_request`, `cost_report`, `list_models_catalog` |
|
||||
| Advanced | `simulate_route`, `set_budget_guard`, `set_resilience_profile`, `test_combo`, `get_provider_metrics`, `best_combo_for_task`, `explain_route`, `get_session_snapshot` |
|
||||
|
||||
- Scoped authorization (9 scopes), audit logging, Zod schemas
|
||||
- IDE configs for Claude Desktop, Cursor, VS Code Copilot
|
||||
|
||||
### A2A Server (`src/lib/a2a/`)
|
||||
|
||||
Agent-to-Agent v0.3 protocol:
|
||||
|
||||
- JSON-RPC 2.0: `message/send`, `message/stream`, `tasks/get`, `tasks/cancel`
|
||||
- Agent Card at `/.well-known/agent.json`
|
||||
- Skills: `smart-routing`, `quota-management`
|
||||
- SSE streaming with 15s heartbeat
|
||||
- Task Manager with state machine and TTL-based cleanup
|
||||
|
||||
### Auto-Combo Engine (`open-sse/services/autoCombo/`)
|
||||
|
||||
Self-healing routing optimization:
|
||||
- 6-factor scoring, 4 mode packs, bandit exploration
|
||||
- Progressive cooldown, probe-based re-admission
|
||||
|
||||
### Dashboard (`src/app/(dashboard)/`)
|
||||
|
||||
| Page | Description |
|
||||
| ---------------------------- | -------------------------------------------------------------- |
|
||||
| `/dashboard` | Home with quick start, provider overview |
|
||||
| `/dashboard/endpoint` | **Endpoints** (tabbed): Endpoint Proxy, MCP, A2A, API Endpoints |
|
||||
| `/dashboard/providers` | Provider management and connections |
|
||||
| `/dashboard/combos` | Combo configurations with routing strategies |
|
||||
| `/dashboard/logs` | Request, Proxy, Audit, Console logs (tabbed) |
|
||||
| `/dashboard/analytics` | Usage analytics and evaluations |
|
||||
| `/dashboard/costs` | Cost tracking and breakdown |
|
||||
| `/dashboard/health` | Uptime, circuit breakers, latency |
|
||||
| `/dashboard/cli-tools` | CLI tool integrations (Claude, Codex, Antigravity, etc.) |
|
||||
| `/dashboard/media` | Image, Video, Music generation playground |
|
||||
| `/dashboard/settings` | System settings with multiple tabs |
|
||||
| `/dashboard/api-manager` | API key management with model permissions |
|
||||
|
||||
### OAuth & Tokens (`src/lib/oauth/`)
|
||||
|
||||
18 modules handling OAuth flows, token refresh, and provider credentials.
|
||||
@@ -76,7 +128,7 @@ overridable via env vars or `data/provider-credentials.json`.
|
||||
|
||||
- No hardcoded API keys or secrets in commits
|
||||
- Auth middleware on all API routes
|
||||
- Input validation on user-facing endpoints
|
||||
- Input validation on user-facing endpoints (Zod schemas)
|
||||
- SQLite encryption key must not be logged
|
||||
|
||||
### Architecture
|
||||
@@ -85,6 +137,7 @@ overridable via env vars or `data/provider-credentials.json`.
|
||||
- Provider requests flow through `open-sse/handlers/`
|
||||
- Translations use `open-sse/translator/` modules
|
||||
- `localDb.ts` is re-exports only — add new functions to the proper `db/*.ts` module
|
||||
- MCP and A2A pages are embedded as tabs inside `/dashboard/endpoint`, not standalone routes
|
||||
|
||||
### Code Quality
|
||||
|
||||
@@ -92,6 +145,7 @@ overridable via env vars or `data/provider-credentials.json`.
|
||||
- Proper HTTP status codes
|
||||
- No memory leaks in SSE streams (abort signals, cleanup)
|
||||
- Rate limit headers must be parsed correctly
|
||||
- All API inputs validated with Zod schemas
|
||||
|
||||
### Docker
|
||||
|
||||
|
||||
+1550
File diff suppressed because it is too large
Load Diff
+10
-3
@@ -2,6 +2,7 @@ FROM node:22-bookworm-slim AS builder
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
COPY scripts/postinstall.mjs ./scripts/postinstall.mjs
|
||||
RUN if [ -f package-lock.json ]; then npm ci --no-audit --no-fund; else npm install --no-audit --no-fund; fi
|
||||
|
||||
COPY . ./
|
||||
@@ -19,6 +20,7 @@ LABEL org.opencontainers.image.title="omniroute" \
|
||||
ENV NODE_ENV=production
|
||||
ENV PORT=20128
|
||||
ENV HOSTNAME=0.0.0.0
|
||||
ENV NODE_OPTIONS="--max-old-space-size=256"
|
||||
|
||||
# Data directory inside Docker — must match the volume mount in docker-compose.yml
|
||||
ENV DATA_DIR=/app/data
|
||||
@@ -27,13 +29,19 @@ RUN mkdir -p /app/data
|
||||
COPY --from=builder /app/public ./public
|
||||
COPY --from=builder /app/.next/static ./.next/static
|
||||
COPY --from=builder /app/.next/standalone ./
|
||||
# Explicitly copy @swc/helpers — not always traced by standalone output but needed at runtime
|
||||
COPY --from=builder /app/node_modules/@swc/helpers ./node_modules/@swc/helpers
|
||||
COPY --from=builder /app/scripts/run-standalone.mjs ./run-standalone.mjs
|
||||
COPY --from=builder /app/scripts/runtime-env.mjs ./runtime-env.mjs
|
||||
COPY --from=builder /app/scripts/bootstrap-env.mjs ./bootstrap-env.mjs
|
||||
COPY --from=builder /app/scripts/healthcheck.mjs ./healthcheck.mjs
|
||||
|
||||
EXPOSE 20128
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
|
||||
CMD node -e "fetch('http://127.0.0.1:20128/api/settings').then(r=>{if(!r.ok)throw r.status}).catch(()=>process.exit(1))"
|
||||
CMD ["node", "healthcheck.mjs"]
|
||||
|
||||
CMD ["node", "server.js"]
|
||||
CMD ["node", "run-standalone.mjs"]
|
||||
|
||||
FROM runner-base AS runner-cli
|
||||
|
||||
@@ -45,4 +53,3 @@ RUN apt-get update \
|
||||
|
||||
# Install CLI tools globally. Separate layer from apt for better cache reuse.
|
||||
RUN npm install -g --no-audit --no-fund @openai/codex @anthropic-ai/claude-code droid openclaw@latest
|
||||
|
||||
|
||||
-1000
File diff suppressed because it is too large
Load Diff
-999
@@ -1,999 +0,0 @@
|
||||
<div align="center">
|
||||
<img src="./docs/screenshots/MainOmniRoute.png" alt="OmniRoute Dashboard" width="800"/>
|
||||
|
||||
# 🚀 OmniRoute — La Passerelle IA Gratuite
|
||||
|
||||
### N'arrêtez jamais de coder. Routage intelligent vers des **modèles IA GRATUITS et économiques** avec fallback automatique.
|
||||
|
||||
_Votre proxy API universel — un endpoint, 36+ fournisseurs, zéro temps d'arrêt._
|
||||
|
||||
**Chat Completions • Embeddings • Génération d'images • Audio • Reranking • 100% TypeScript**
|
||||
|
||||
---
|
||||
|
||||
### 🤖 Fournisseur IA gratuit pour vos agents de programmation préférés
|
||||
|
||||
_Connectez n'importe quel IDE ou outil CLI alimenté par l'IA via OmniRoute — passerelle API gratuite pour un codage illimité._
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/cline/cline">
|
||||
<img src="./public/providers/openclaw.png" alt="OpenClaw" width="48"/><br/>
|
||||
<b>OpenClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 205K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/HKUDS/nanobot">
|
||||
<img src="./public/providers/nanobot.png" alt="NanoBot" width="48"/><br/>
|
||||
<b>NanoBot</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 20.9K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/sipeed/picoclaw">
|
||||
<img src="./public/providers/picoclaw.jpg" alt="PicoClaw" width="48"/><br/>
|
||||
<b>PicoClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 14.6K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/zeroclaw-labs/zeroclaw">
|
||||
<img src="./public/providers/zeroclaw.png" alt="ZeroClaw" width="48"/><br/>
|
||||
<b>ZeroClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 9.9K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/nearai/ironclaw">
|
||||
<img src="./public/providers/ironclaw.png" alt="IronClaw" width="48"/><br/>
|
||||
<b>IronClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 2.1K</sub>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/anomalyco/opencode">
|
||||
<img src="./public/providers/opencode.svg" alt="OpenCode" width="48"/><br/>
|
||||
<b>OpenCode</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 106K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/openai/codex">
|
||||
<img src="./public/providers/codex.png" alt="Codex CLI" width="48"/><br/>
|
||||
<b>Codex CLI</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 60.8K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/anthropics/claude-code">
|
||||
<img src="./public/providers/claude.png" alt="Claude Code" width="48"/><br/>
|
||||
<b>Claude Code</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 67.3K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/google-gemini/gemini-cli">
|
||||
<img src="./public/providers/gemini-cli.png" alt="Gemini CLI" width="48"/><br/>
|
||||
<b>Gemini CLI</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 94.7K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/Kilo-Org/kilocode">
|
||||
<img src="./public/providers/kilocode.png" alt="Kilo Code" width="48"/><br/>
|
||||
<b>Kilo Code</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 15.5K</sub>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<sub>📡 Tous les agents se connectent via <code>http://localhost:20128/v1</code> ou <code>http://cloud.omniroute.online/v1</code> — une configuration, modèles et quota illimités</sub>
|
||||
|
||||
---
|
||||
|
||||
[](https://www.npmjs.com/package/omniroute)
|
||||
[](https://hub.docker.com/r/diegosouzapw/omniroute)
|
||||
[](https://github.com/diegosouzapw/OmniRoute/blob/main/LICENSE)
|
||||
[](https://omniroute.online)
|
||||
[](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t)
|
||||
|
||||
[🌐 Site web](https://omniroute.online) • [🚀 Démarrage rapide](#-démarrage-rapide) • [💡 Fonctionnalités](#-fonctionnalités-principales) • [📖 Docs](#-documentation) • [💰 Tarifs](#-aperçu-des-tarifs)
|
||||
|
||||
🌐 **Disponible en :** [English](README.md) | [Português](README.pt-BR.md) | [Español](README.es.md) | [Русский](README.ru.md) | [中文](README.zh-CN.md) | [Deutsch](README.de.md) | [Français](README.fr.md) | [Italiano](README.it.md)
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## 🤔 Pourquoi OmniRoute ?
|
||||
|
||||
**Arrêtez de gaspiller de l'argent et de vous heurter aux limites :**
|
||||
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> Le quota d'abonnement expire inutilisé chaque mois
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> Les limites de débit vous arrêtent en plein codage
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> APIs coûteuses (20-50 $/mois par fournisseur)
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> Changement manuel entre fournisseurs
|
||||
|
||||
**OmniRoute résout ces problèmes :**
|
||||
|
||||
- ✅ **Maximisez les abonnements** — Suivez les quotas, utilisez chaque bit avant la réinitialisation
|
||||
- ✅ **Fallback automatique** — Abonnement → Clé API → Économique → Gratuit, zéro temps d'arrêt
|
||||
- ✅ **Multi-comptes** — Round-robin entre les comptes par fournisseur
|
||||
- ✅ **Universel** — Fonctionne avec Claude Code, Codex, Gemini CLI, Cursor, Cline, OpenClaw, tout outil CLI
|
||||
|
||||
---
|
||||
|
||||
## 🔄 Comment ça fonctionne
|
||||
|
||||
```
|
||||
┌─────────────┐
|
||||
│ Votre CLI │ (Claude Code, Codex, Gemini CLI, OpenClaw, Cursor, Cline...)
|
||||
│ Tool │
|
||||
└──────┬──────┘
|
||||
│ http://localhost:20128/v1
|
||||
↓
|
||||
┌─────────────────────────────────────────┐
|
||||
│ OmniRoute (Routeur intelligent) │
|
||||
│ • Traduction de format (OpenAI ↔ Claude) │
|
||||
│ • Suivi des quotas + Embeddings + Images │
|
||||
│ • Renouvellement automatique des tokens │
|
||||
└──────┬──────────────────────────────────┘
|
||||
│
|
||||
├─→ [Tier 1: ABONNEMENT] Claude Code, Codex, Gemini CLI
|
||||
│ ↓ quota épuisé
|
||||
├─→ [Tier 2: CLÉ API] DeepSeek, Groq, xAI, Mistral, NVIDIA NIM, etc.
|
||||
│ ↓ limite de budget
|
||||
├─→ [Tier 3: ÉCONOMIQUE] GLM ($0.6/1M), MiniMax ($0.2/1M)
|
||||
│ ↓ limite de budget
|
||||
└─→ [Tier 4: GRATUIT] iFlow, Qwen, Kiro (illimité)
|
||||
|
||||
Résultat : Ne jamais arrêter de coder, coût minimal
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Démarrage rapide
|
||||
|
||||
**1. Installer globalement :**
|
||||
|
||||
```bash
|
||||
npm install -g omniroute
|
||||
omniroute
|
||||
```
|
||||
|
||||
🎉 Le tableau de bord s'ouvre sur `http://localhost:20128`
|
||||
|
||||
| Commande | Description |
|
||||
| ----------------------- | ------------------------------------------- |
|
||||
| `omniroute` | Démarrer le serveur (port par défaut 20128) |
|
||||
| `omniroute --port 3000` | Utiliser un port personnalisé |
|
||||
| `omniroute --no-open` | Ne pas ouvrir le navigateur automatiquement |
|
||||
| `omniroute --help` | Afficher l'aide |
|
||||
|
||||
**2. Connecter un fournisseur GRATUIT :**
|
||||
|
||||
Tableau de bord → Fournisseurs → Connecter **Claude Code** ou **Antigravity** → Connexion OAuth → Terminé !
|
||||
|
||||
**3. Utiliser dans votre outil CLI :**
|
||||
|
||||
```
|
||||
Claude Code/Codex/Gemini CLI/OpenClaw/Cursor/Cline Paramètres :
|
||||
Endpoint : http://localhost:20128/v1
|
||||
API Key : [copier depuis le tableau de bord]
|
||||
Model : if/kimi-k2-thinking
|
||||
```
|
||||
|
||||
**C'est tout !** Commencez à coder avec des modèles IA GRATUITS.
|
||||
|
||||
**Alternative — exécuter depuis le code source :**
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
npm install
|
||||
PORT=20128 NEXT_PUBLIC_BASE_URL=http://localhost:20128 npm run dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🐳 Docker
|
||||
|
||||
OmniRoute est disponible en tant qu'image Docker publique sur [Docker Hub](https://hub.docker.com/r/diegosouzapw/omniroute).
|
||||
|
||||
**Démarrage rapide :**
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
**Avec fichier d'environnement :**
|
||||
|
||||
```bash
|
||||
# Copier et modifier le .env d'abord
|
||||
cp .env.example .env
|
||||
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
--env-file .env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
**Avec Docker Compose :**
|
||||
|
||||
```bash
|
||||
# Profil de base (sans outils CLI)
|
||||
docker compose --profile base up -d
|
||||
|
||||
# Profil CLI (Claude Code, Codex, OpenClaw intégrés)
|
||||
docker compose --profile cli up -d
|
||||
```
|
||||
|
||||
| Image | Tag | Taille | Description |
|
||||
| ------------------------ | -------- | ------ | ----------------------- |
|
||||
| `diegosouzapw/omniroute` | `latest` | ~250MB | Dernière version stable |
|
||||
| `diegosouzapw/omniroute` | `1.0.6` | ~250MB | Version actuelle |
|
||||
|
||||
---
|
||||
|
||||
## 💰 Aperçu des tarifs
|
||||
|
||||
| Tier | Fournisseur | Coût | Réinitialisation | Idéal pour |
|
||||
| ----------------- | ----------------- | -------------------------- | ------------------- | ----------------------------- |
|
||||
| **💳 ABONNEMENT** | Claude Code (Pro) | 20 $/mois | 5h + hebdomadaire | Déjà abonné |
|
||||
| | Codex (Plus/Pro) | 20-200 $/mois | 5h + hebdomadaire | Utilisateurs OpenAI |
|
||||
| | Gemini CLI | **GRATUIT** | 180K/mois + 1K/jour | Tout le monde ! |
|
||||
| | GitHub Copilot | 10-19 $/mois | Mensuel | Utilisateurs GitHub |
|
||||
| **🔑 CLÉ API** | NVIDIA NIM | **GRATUIT** (1000 crédits) | Unique | Tests gratuits |
|
||||
| | DeepSeek | À l'usage | Aucune | Meilleur rapport qualité-prix |
|
||||
| | Groq | Niveau gratuit + payant | Limité | Inférence ultra-rapide |
|
||||
| | xAI (Grok) | À l'usage | Aucune | Modèles Grok |
|
||||
| | Mistral | Niveau gratuit + payant | Limité | IA européenne |
|
||||
| | OpenRouter | À l'usage | Aucune | 100+ modèles |
|
||||
| **💰 ÉCONOMIQUE** | GLM-4.7 | 0,6 $/1M | Quotidien 10h | Backup économique |
|
||||
| | MiniMax M2.1 | 0,2 $/1M | Rotatif 5h | Option la moins chère |
|
||||
| | Kimi K2 | 9 $/mois fixe | 10M tokens/mois | Coût prévisible |
|
||||
| **🆓 GRATUIT** | iFlow | 0 $ | Illimité | 8 modèles gratuits |
|
||||
| | Qwen | 0 $ | Illimité | 3 modèles gratuits |
|
||||
| | Kiro | 0 $ | Illimité | Claude gratuit |
|
||||
|
||||
**💡 Conseil Pro :** Commencez avec Gemini CLI (180K gratuits/mois) + iFlow (illimité gratuit) = 0 $ de coût !
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Cas d'utilisation
|
||||
|
||||
### Cas 1 : « J'ai un abonnement Claude Pro »
|
||||
|
||||
**Problème :** Le quota expire inutilisé, limites de débit pendant le codage intensif
|
||||
|
||||
```
|
||||
Combo : "maximize-claude"
|
||||
1. cc/claude-opus-4-6 (utiliser l'abonnement au maximum)
|
||||
2. glm/glm-4.7 (backup économique quand le quota est épuisé)
|
||||
3. if/kimi-k2-thinking (fallback d'urgence gratuit)
|
||||
|
||||
Coût mensuel : 20 $ (abonnement) + ~5 $ (backup) = 25 $ au total
|
||||
vs. 20 $ + atteindre les limites = frustration
|
||||
```
|
||||
|
||||
### Cas 2 : « Je veux zéro coût »
|
||||
|
||||
**Problème :** Impossible de payer des abonnements, besoin d'IA fiable pour coder
|
||||
|
||||
```
|
||||
Combo : "free-forever"
|
||||
1. gc/gemini-3-flash (180K gratuits/mois)
|
||||
2. if/kimi-k2-thinking (illimité gratuit)
|
||||
3. qw/qwen3-coder-plus (illimité gratuit)
|
||||
|
||||
Coût mensuel : 0 $
|
||||
Qualité : Modèles prêts pour la production
|
||||
```
|
||||
|
||||
### Cas 3 : « Je dois coder 24/7, sans interruption »
|
||||
|
||||
**Problème :** Délais serrés, ne peut pas se permettre de temps d'arrêt
|
||||
|
||||
```
|
||||
Combo : "always-on"
|
||||
1. cc/claude-opus-4-6 (meilleure qualité)
|
||||
2. cx/gpt-5.2-codex (deuxième abonnement)
|
||||
3. glm/glm-4.7 (économique, reset quotidien)
|
||||
4. minimax/MiniMax-M2.1 (le moins cher, reset 5h)
|
||||
5. if/kimi-k2-thinking (gratuit illimité)
|
||||
|
||||
Résultat : 5 niveaux de fallback = zéro temps d'arrêt
|
||||
```
|
||||
|
||||
### Cas 4 : « Je veux l'IA GRATUITE dans OpenClaw »
|
||||
|
||||
**Problème :** Besoin d'assistant IA dans les apps de messagerie, entièrement gratuit
|
||||
|
||||
```
|
||||
Combo : "openclaw-free"
|
||||
1. if/glm-4.7 (illimité gratuit)
|
||||
2. if/minimax-m2.1 (illimité gratuit)
|
||||
3. if/kimi-k2-thinking (illimité gratuit)
|
||||
|
||||
Coût mensuel : 0 $
|
||||
Accès via : WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 💡 Fonctionnalités principales
|
||||
|
||||
### 🧠 Routage & Intelligence
|
||||
|
||||
| Fonctionnalité | Ce qu'elle fait |
|
||||
| ------------------------------------- | ------------------------------------------------------------------------------- |
|
||||
| 🎯 **Fallback intelligent 4 niveaux** | Auto-routage : Abonnement → Clé API → Économique → Gratuit |
|
||||
| 📊 **Suivi des quotas en temps réel** | Comptage de tokens en direct + compte à rebours de réinitialisation |
|
||||
| 🔄 **Traduction de format** | OpenAI ↔ Claude ↔ Gemini ↔ Cursor ↔ Kiro transparent |
|
||||
| 👥 **Support multi-comptes** | Plusieurs comptes par fournisseur avec sélection intelligente |
|
||||
| 🔄 **Renouvellement auto des tokens** | Les tokens OAuth se renouvellent automatiquement avec retry |
|
||||
| 🎨 **Combos personnalisés** | 6 stratégies : fill-first, round-robin, p2c, random, least-used, cost-optimized |
|
||||
| 🧩 **Modèles personnalisés** | Ajoutez n'importe quel ID de modèle à n'importe quel fournisseur |
|
||||
| 🌐 **Routeur wildcard** | Routez les patterns `provider/*` vers n'importe quel fournisseur dynamiquement |
|
||||
| 🧠 **Budget de raisonnement** | Modes passthrough, auto, custom et adaptive pour les modèles de raisonnement |
|
||||
| 💬 **Injection System Prompt** | System prompt global appliqué à toutes les requêtes |
|
||||
| 📄 **API Responses** | Support complet de l'API Responses d'OpenAI (`/v1/responses`) pour Codex |
|
||||
|
||||
### 🎵 APIs multi-modales
|
||||
|
||||
| Fonctionnalité | Ce qu'elle fait |
|
||||
| -------------------------- | ------------------------------------------------------- |
|
||||
| 🖼️ **Génération d'images** | `/v1/images/generations` — 4 fournisseurs, 9+ modèles |
|
||||
| 📐 **Embeddings** | `/v1/embeddings` — 6 fournisseurs, 9+ modèles |
|
||||
| 🎤 **Transcription audio** | `/v1/audio/transcriptions` — compatible Whisper |
|
||||
| 🔊 **Texte vers parole** | `/v1/audio/speech` — synthèse audio multi-fournisseur |
|
||||
| 🛡️ **Modérations** | `/v1/moderations` — vérifications de sécurité |
|
||||
| 🔀 **Reranking** | `/v1/rerank` — reclassement de pertinence des documents |
|
||||
|
||||
### 🛡️ Résilience & Sécurité
|
||||
|
||||
| Fonctionnalité | Ce qu'elle fait |
|
||||
| ------------------------------- | -------------------------------------------------------------------- |
|
||||
| 🔌 **Circuit Breaker** | Ouverture/fermeture auto par fournisseur avec seuils configurables |
|
||||
| 🛡️ **Anti-Thundering Herd** | Mutex + sémaphore de rate-limit pour les fournisseurs avec clé API |
|
||||
| 🧠 **Cache sémantique** | Cache à deux niveaux (signature + sémantique) réduit coût et latence |
|
||||
| ⚡ **Idempotence des requêtes** | Fenêtre de dédup 5s pour les requêtes dupliquées |
|
||||
| 🔒 **Spoofing TLS Fingerprint** | Contournement de détection de bot via wreq-js |
|
||||
| 🌐 **Filtrage IP** | Allowlist/blocklist pour le contrôle d'accès API |
|
||||
| 📊 **Rate limits éditables** | RPM configurable, intervalle minimum, concurrence max |
|
||||
|
||||
### 📊 Observabilité & Analytique
|
||||
|
||||
| Fonctionnalité | Ce qu'elle fait |
|
||||
| --------------------------------- | ------------------------------------------------------------------------- |
|
||||
| 📝 **Logs de requêtes** | Mode debug avec logs complets requête/réponse |
|
||||
| 💾 **Logs SQLite** | Logs proxy persistants survivant aux redémarrages |
|
||||
| 📊 **Tableau de bord analytique** | Recharts : cartes de stats, graphique d'utilisation, tableau fournisseurs |
|
||||
| 📈 **Suivi de progression** | Événements SSE de progression opt-in pour le streaming |
|
||||
| 🧪 **Évaluations LLM** | Tests avec golden set et 4 stratégies de correspondance |
|
||||
| 🔍 **Télémétrie des requêtes** | Agrégation de latence p50/p95/p99 + traçage X-Request-Id |
|
||||
| 📋 **Logs + Quotas** | Pages dédiées pour navigation des logs et suivi des quotas |
|
||||
| 🏥 **Tableau de bord santé** | Uptime, états circuit breaker, lockouts, stats cache |
|
||||
| 💰 **Suivi des coûts** | Gestion de budget + configuration des prix par modèle |
|
||||
|
||||
### ☁️ Déploiement & Synchronisation
|
||||
|
||||
| Fonctionnalité | Ce qu'elle fait |
|
||||
| --------------------------------- | ------------------------------------------------------------------------------- |
|
||||
| 💾 **Cloud Sync** | Synchroniser les paramètres entre appareils via Cloudflare Workers |
|
||||
| 🌐 **Déployer partout** | Localhost, VPS, Docker, Cloudflare Workers |
|
||||
| 🔑 **Gestion des clés API** | Générer, faire tourner et limiter les clés API par fournisseur |
|
||||
| 🧙 **Assistant de configuration** | Setup guidé en 4 étapes pour les nouveaux utilisateurs |
|
||||
| 🔧 **Tableau de bord CLI Tools** | Configuration en un clic pour Claude, Codex, Cline, OpenClaw, Kilo, Antigravity |
|
||||
| 🔄 **Sauvegardes DB** | Sauvegarde et restauration automatiques de tous les paramètres |
|
||||
|
||||
<details>
|
||||
<summary><b>📖 Détails des fonctionnalités</b></summary>
|
||||
|
||||
### 🎯 Fallback intelligent 4 niveaux
|
||||
|
||||
Créez des combos avec fallback automatique :
|
||||
|
||||
```
|
||||
Combo : "my-coding-stack"
|
||||
1. cc/claude-opus-4-6 (votre abonnement)
|
||||
2. nvidia/llama-3.3-70b (API NVIDIA gratuite)
|
||||
3. glm/glm-4.7 (backup économique, $0.6/1M)
|
||||
4. if/kimi-k2-thinking (fallback gratuit)
|
||||
|
||||
→ Bascule automatiquement lorsque le quota est épuisé ou en cas d'erreurs
|
||||
```
|
||||
|
||||
### 📊 Suivi des quotas en temps réel
|
||||
|
||||
- Consommation de tokens par fournisseur
|
||||
- Compte à rebours de réinitialisation (5 heures, quotidien, hebdomadaire)
|
||||
- Estimation des coûts pour les niveaux payants
|
||||
- Rapports de dépenses mensuels
|
||||
|
||||
### 🔄 Traduction de format
|
||||
|
||||
Traduction transparente entre les formats :
|
||||
|
||||
- **OpenAI** ↔ **Claude** ↔ **Gemini** ↔ **OpenAI Responses**
|
||||
- Votre CLI envoie le format OpenAI → OmniRoute traduit → Le fournisseur reçoit le format natif
|
||||
- Fonctionne avec tout outil supportant les endpoints OpenAI personnalisés
|
||||
|
||||
### 👥 Support multi-comptes
|
||||
|
||||
- Ajouter plusieurs comptes par fournisseur
|
||||
- Round-robin automatique ou routage par priorité
|
||||
- Basculement vers le compte suivant lorsqu'un quota est atteint
|
||||
|
||||
### 🔄 Renouvellement automatique des tokens
|
||||
|
||||
- Les tokens OAuth se renouvellent automatiquement avant expiration
|
||||
- Pas de réauthentification manuelle nécessaire
|
||||
- Expérience transparente sur tous les fournisseurs
|
||||
|
||||
### 🎨 Combos personnalisés
|
||||
|
||||
- Créer des combinaisons de modèles illimitées
|
||||
- 6 stratégies : fill-first, round-robin, power-of-two-choices, random, least-used, cost-optimized
|
||||
- Partager les combos entre appareils avec Cloud Sync
|
||||
|
||||
### 🏥 Tableau de bord santé
|
||||
|
||||
- Statut du système (uptime, version, utilisation mémoire)
|
||||
- États des circuit breakers par fournisseur (Closed/Open/Half-Open)
|
||||
- Statut des rate limits et lockouts actifs
|
||||
- Statistiques du cache de signatures
|
||||
- Télémétrie de latence (p50/p95/p99) + cache de prompt
|
||||
- Réinitialisation de la santé en un clic
|
||||
|
||||
### 🔧 Playground du traducteur
|
||||
|
||||
- Déboguer, tester et visualiser les traductions de format d'API
|
||||
- Envoyer des requêtes et voir comment OmniRoute traduit entre les formats des fournisseurs
|
||||
- Inestimable pour résoudre les problèmes d'intégration
|
||||
|
||||
### 💾 Cloud Sync
|
||||
|
||||
- Synchroniser fournisseurs, combos et paramètres entre appareils
|
||||
- Synchronisation en arrière-plan automatique
|
||||
- Stockage chiffré sécurisé
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 📖 Guide de configuration
|
||||
|
||||
<details>
|
||||
<summary><b>💳 Fournisseurs par abonnement</b></summary>
|
||||
|
||||
### Claude Code (Pro/Max)
|
||||
|
||||
```bash
|
||||
Tableau de bord → Fournisseurs → Connecter Claude Code
|
||||
→ Connexion OAuth → Renouvellement auto des tokens
|
||||
→ Suivi de quota 5h + hebdomadaire
|
||||
|
||||
Modèles :
|
||||
cc/claude-opus-4-6
|
||||
cc/claude-sonnet-4-5-20250929
|
||||
cc/claude-haiku-4-5-20251001
|
||||
```
|
||||
|
||||
**Conseil Pro :** Utilisez Opus pour les tâches complexes, Sonnet pour la vitesse. OmniRoute suit les quotas par modèle !
|
||||
|
||||
### OpenAI Codex (Plus/Pro)
|
||||
|
||||
```bash
|
||||
Tableau de bord → Fournisseurs → Connecter Codex
|
||||
→ Connexion OAuth (port 1455)
|
||||
→ Reset 5h + hebdomadaire
|
||||
|
||||
Modèles :
|
||||
cx/gpt-5.2-codex
|
||||
cx/gpt-5.1-codex-max
|
||||
```
|
||||
|
||||
### Gemini CLI (GRATUIT 180K/mois !)
|
||||
|
||||
```bash
|
||||
Tableau de bord → Fournisseurs → Connecter Gemini CLI
|
||||
→ Google OAuth
|
||||
→ 180K completions/mois + 1K/jour
|
||||
|
||||
Modèles :
|
||||
gc/gemini-3-flash-preview
|
||||
gc/gemini-2.5-pro
|
||||
```
|
||||
|
||||
**Meilleure valeur :** Niveau gratuit énorme ! Utilisez avant les niveaux payants.
|
||||
|
||||
### GitHub Copilot
|
||||
|
||||
```bash
|
||||
Tableau de bord → Fournisseurs → Connecter GitHub
|
||||
→ OAuth via GitHub
|
||||
→ Reset mensuel (1er du mois)
|
||||
|
||||
Modèles :
|
||||
gh/gpt-5
|
||||
gh/claude-4.5-sonnet
|
||||
gh/gemini-3-pro
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔑 Fournisseurs par clé API</b></summary>
|
||||
|
||||
### NVIDIA NIM (GRATUIT 1000 crédits !)
|
||||
|
||||
1. S'inscrire : [build.nvidia.com](https://build.nvidia.com)
|
||||
2. Obtenir une clé API gratuite (1000 crédits d'inférence inclus)
|
||||
3. Tableau de bord → Ajouter fournisseur → NVIDIA NIM :
|
||||
- API Key : `nvapi-your-key`
|
||||
|
||||
**Modèles :** `nvidia/llama-3.3-70b-instruct`, `nvidia/mistral-7b-instruct` et 50+ autres
|
||||
|
||||
**Conseil Pro :** API compatible OpenAI — fonctionne parfaitement avec la traduction de format d'OmniRoute !
|
||||
|
||||
### DeepSeek
|
||||
|
||||
1. S'inscrire : [platform.deepseek.com](https://platform.deepseek.com)
|
||||
2. Obtenir une clé API
|
||||
3. Tableau de bord → Ajouter fournisseur → DeepSeek
|
||||
|
||||
**Modèles :** `deepseek/deepseek-chat`, `deepseek/deepseek-coder`
|
||||
|
||||
### Groq (Niveau gratuit disponible !)
|
||||
|
||||
1. S'inscrire : [console.groq.com](https://console.groq.com)
|
||||
2. Obtenir une clé API (niveau gratuit inclus)
|
||||
3. Tableau de bord → Ajouter fournisseur → Groq
|
||||
|
||||
**Modèles :** `groq/llama-3.3-70b`, `groq/mixtral-8x7b`
|
||||
|
||||
**Conseil Pro :** Inférence ultra-rapide — idéal pour le codage en temps réel !
|
||||
|
||||
### OpenRouter (100+ modèles)
|
||||
|
||||
1. S'inscrire : [openrouter.ai](https://openrouter.ai)
|
||||
2. Obtenir une clé API
|
||||
3. Tableau de bord → Ajouter fournisseur → OpenRouter
|
||||
|
||||
**Modèles :** Accès à 100+ modèles de tous les grands fournisseurs via une seule clé API.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>💰 Fournisseurs économiques (Backup)</b></summary>
|
||||
|
||||
### GLM-4.7 (Reset quotidien, $0.6/1M)
|
||||
|
||||
1. S'inscrire : [Zhipu AI](https://open.bigmodel.cn/)
|
||||
2. Obtenir une clé API du Coding Plan
|
||||
3. Tableau de bord → Ajouter clé API :
|
||||
- Fournisseur : `glm`
|
||||
- API Key : `your-key`
|
||||
|
||||
**Utilisez :** `glm/glm-4.7`
|
||||
|
||||
**Conseil Pro :** Le Coding Plan offre 3× le quota à 1/7 du coût ! Reset quotidien à 10h.
|
||||
|
||||
### MiniMax M2.1 (Reset 5h, $0.20/1M)
|
||||
|
||||
1. S'inscrire : [MiniMax](https://www.minimax.io/)
|
||||
2. Obtenir une clé API
|
||||
3. Tableau de bord → Ajouter clé API
|
||||
|
||||
**Utilisez :** `minimax/MiniMax-M2.1`
|
||||
|
||||
**Conseil Pro :** L'option la moins chère pour le contexte long (1M tokens) !
|
||||
|
||||
### Kimi K2 (9 $/mois fixe)
|
||||
|
||||
1. S'abonner : [Moonshot AI](https://platform.moonshot.ai/)
|
||||
2. Obtenir une clé API
|
||||
3. Tableau de bord → Ajouter clé API
|
||||
|
||||
**Utilisez :** `kimi/kimi-latest`
|
||||
|
||||
**Conseil Pro :** 9 $/mois fixe pour 10M tokens = 0,90 $/1M de coût effectif !
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🆓 Fournisseurs GRATUITS (Backup d'urgence)</b></summary>
|
||||
|
||||
### iFlow (8 modèles GRATUITS)
|
||||
|
||||
```bash
|
||||
Tableau de bord → Connecter iFlow
|
||||
→ Connexion OAuth iFlow
|
||||
→ Utilisation illimitée
|
||||
|
||||
Modèles :
|
||||
if/kimi-k2-thinking
|
||||
if/qwen3-coder-plus
|
||||
if/glm-4.7
|
||||
if/minimax-m2
|
||||
if/deepseek-r1
|
||||
```
|
||||
|
||||
### Qwen (3 modèles GRATUITS)
|
||||
|
||||
```bash
|
||||
Tableau de bord → Connecter Qwen
|
||||
→ Autorisation par code d'appareil
|
||||
→ Utilisation illimitée
|
||||
|
||||
Modèles :
|
||||
qw/qwen3-coder-plus
|
||||
qw/qwen3-coder-flash
|
||||
```
|
||||
|
||||
### Kiro (Claude GRATUIT)
|
||||
|
||||
```bash
|
||||
Tableau de bord → Connecter Kiro
|
||||
→ AWS Builder ID ou Google/GitHub
|
||||
→ Utilisation illimitée
|
||||
|
||||
Modèles :
|
||||
kr/claude-sonnet-4.5
|
||||
kr/claude-haiku-4.5
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🎨 Créer des combos</b></summary>
|
||||
|
||||
### Exemple 1 : Maximiser l'abonnement → Backup économique
|
||||
|
||||
```
|
||||
Tableau de bord → Combos → Créer nouveau
|
||||
|
||||
Nom : premium-coding
|
||||
Modèles :
|
||||
1. cc/claude-opus-4-6 (Abonnement principal)
|
||||
2. glm/glm-4.7 (Backup économique, $0.6/1M)
|
||||
3. minimax/MiniMax-M2.1 (Fallback le moins cher, $0.20/1M)
|
||||
|
||||
Utilisez en CLI : premium-coding
|
||||
```
|
||||
|
||||
### Exemple 2 : Gratuit uniquement (Zéro coût)
|
||||
|
||||
```
|
||||
Nom : free-combo
|
||||
Modèles :
|
||||
1. gc/gemini-3-flash-preview (180K gratuits/mois)
|
||||
2. if/kimi-k2-thinking (illimité)
|
||||
3. qw/qwen3-coder-plus (illimité)
|
||||
|
||||
Coût : 0 $ pour toujours !
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔧 Intégration CLI</b></summary>
|
||||
|
||||
### Cursor IDE
|
||||
|
||||
```
|
||||
Paramètres → Modèles → Avancé :
|
||||
OpenAI API Base URL : http://localhost:20128/v1
|
||||
OpenAI API Key : [du tableau de bord OmniRoute]
|
||||
Model : cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
|
||||
Utilisez la page **CLI Tools** dans le tableau de bord pour la configuration en un clic, ou modifiez `~/.claude/settings.json` manuellement.
|
||||
|
||||
### Codex CLI
|
||||
|
||||
```bash
|
||||
export OPENAI_BASE_URL="http://localhost:20128"
|
||||
export OPENAI_API_KEY="your-omniroute-api-key"
|
||||
|
||||
codex "your prompt"
|
||||
```
|
||||
|
||||
### OpenClaw
|
||||
|
||||
**Option 1 — Tableau de bord (recommandé) :**
|
||||
|
||||
```
|
||||
Tableau de bord → CLI Tools → OpenClaw → Sélectionner modèle → Appliquer
|
||||
```
|
||||
|
||||
**Option 2 — Manuel :** Modifier `~/.openclaw/openclaw.json` :
|
||||
|
||||
```json
|
||||
{
|
||||
"models": {
|
||||
"providers": {
|
||||
"omniroute": {
|
||||
"baseUrl": "http://127.0.0.1:20128/v1",
|
||||
"apiKey": "sk_omniroute",
|
||||
"api": "openai-completions"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> **Note :** OpenClaw fonctionne uniquement avec OmniRoute local. Utilisez `127.0.0.1` au lieu de `localhost` pour éviter les problèmes de résolution IPv6.
|
||||
|
||||
### Cline / Continue / RooCode
|
||||
|
||||
```
|
||||
Paramètres → Configuration API :
|
||||
Fournisseur : OpenAI Compatible
|
||||
Base URL : http://localhost:20128/v1
|
||||
API Key : [du tableau de bord OmniRoute]
|
||||
Model : if/kimi-k2-thinking
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 📊 Modèles disponibles
|
||||
|
||||
<details>
|
||||
<summary><b>Voir tous les modèles disponibles</b></summary>
|
||||
|
||||
**Claude Code (`cc/`)** - Pro/Max :
|
||||
|
||||
- `cc/claude-opus-4-6`
|
||||
- `cc/claude-sonnet-4-5-20250929`
|
||||
- `cc/claude-haiku-4-5-20251001`
|
||||
|
||||
**Codex (`cx/`)** - Plus/Pro :
|
||||
|
||||
- `cx/gpt-5.2-codex`
|
||||
- `cx/gpt-5.1-codex-max`
|
||||
|
||||
**Gemini CLI (`gc/`)** - GRATUIT :
|
||||
|
||||
- `gc/gemini-3-flash-preview`
|
||||
- `gc/gemini-2.5-pro`
|
||||
|
||||
**GitHub Copilot (`gh/`)** :
|
||||
|
||||
- `gh/gpt-5`
|
||||
- `gh/claude-4.5-sonnet`
|
||||
|
||||
**NVIDIA NIM (`nvidia/`)** - Crédits GRATUITS :
|
||||
|
||||
- `nvidia/llama-3.3-70b-instruct`
|
||||
- `nvidia/mistral-7b-instruct`
|
||||
- 50+ modèles sur [build.nvidia.com](https://build.nvidia.com)
|
||||
|
||||
**GLM (`glm/`)** - $0.6/1M :
|
||||
|
||||
- `glm/glm-4.7`
|
||||
|
||||
**MiniMax (`minimax/`)** - $0.2/1M :
|
||||
|
||||
- `minimax/MiniMax-M2.1`
|
||||
|
||||
**iFlow (`if/`)** - GRATUIT :
|
||||
|
||||
- `if/kimi-k2-thinking`
|
||||
- `if/qwen3-coder-plus`
|
||||
- `if/deepseek-r1`
|
||||
- `if/glm-4.7`
|
||||
- `if/minimax-m2`
|
||||
|
||||
**Qwen (`qw/`)** - GRATUIT :
|
||||
|
||||
- `qw/qwen3-coder-plus`
|
||||
- `qw/qwen3-coder-flash`
|
||||
|
||||
**Kiro (`kr/`)** - GRATUIT :
|
||||
|
||||
- `kr/claude-sonnet-4.5`
|
||||
- `kr/claude-haiku-4.5`
|
||||
|
||||
**OpenRouter (`or/`)** - 100+ modèles :
|
||||
|
||||
- `or/anthropic/claude-4-sonnet`
|
||||
- `or/google/gemini-2.5-pro`
|
||||
- Tout modèle de [openrouter.ai/models](https://openrouter.ai/models)
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Évaluations (Evals)
|
||||
|
||||
OmniRoute inclut un framework d'évaluation intégré pour tester la qualité des réponses LLM contre un golden set. Accès via **Analytics → Evals** dans le tableau de bord.
|
||||
|
||||
### Golden Set intégré
|
||||
|
||||
Le « OmniRoute Golden Set » préchargé contient 10 cas de test :
|
||||
|
||||
- Salutations, mathématiques, géographie, génération de code
|
||||
- Conformité format JSON, traduction, markdown
|
||||
- Rejet de sécurité (contenu nocif), comptage, logique booléenne
|
||||
|
||||
### Stratégies d'évaluation
|
||||
|
||||
| Stratégie | Description | Exemple |
|
||||
| ---------- | -------------------------------------------------------------- | -------------------------------- |
|
||||
| `exact` | La sortie doit correspondre exactement | `"4"` |
|
||||
| `contains` | La sortie doit contenir la sous-chaîne (insensible à la casse) | `"Paris"` |
|
||||
| `regex` | La sortie doit correspondre au motif regex | `"1.*2.*3"` |
|
||||
| `custom` | Fonction JS personnalisée retourne true/false | `(output) => output.length > 10` |
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Dépannage
|
||||
|
||||
<details>
|
||||
<summary><b>Cliquez pour développer le guide de dépannage</b></summary>
|
||||
|
||||
**« Language model did not provide messages »**
|
||||
|
||||
- Quota du fournisseur épuisé → Vérifiez le suivi de quota dans le tableau de bord
|
||||
- Solution : Utilisez un combo avec fallback ou passez à un niveau moins cher
|
||||
|
||||
**Rate limiting**
|
||||
|
||||
- Quota d'abonnement épuisé → Fallback vers GLM/MiniMax
|
||||
- Ajoutez un combo : `cc/claude-opus-4-6 → glm/glm-4.7 → if/kimi-k2-thinking`
|
||||
|
||||
**Token OAuth expiré**
|
||||
|
||||
- Renouvelé automatiquement par OmniRoute
|
||||
- Si le problème persiste : Tableau de bord → Fournisseur → Reconnecter
|
||||
|
||||
**Coûts élevés**
|
||||
|
||||
- Vérifiez les statistiques d'utilisation dans Tableau de bord → Coûts
|
||||
- Changez le modèle principal pour GLM/MiniMax
|
||||
- Utilisez le niveau gratuit (Gemini CLI, iFlow) pour les tâches non critiques
|
||||
|
||||
**Le tableau de bord s'ouvre sur le mauvais port**
|
||||
|
||||
- Définissez `PORT=20128` et `NEXT_PUBLIC_BASE_URL=http://localhost:20128`
|
||||
|
||||
**Erreurs de cloud sync**
|
||||
|
||||
- Vérifiez que `BASE_URL` pointe vers votre instance en cours d'exécution
|
||||
- Vérifiez que `CLOUD_URL` pointe vers le point de terminaison cloud attendu
|
||||
- Gardez les valeurs `NEXT_PUBLIC_*` alignées avec les valeurs du serveur
|
||||
|
||||
**Le premier login ne fonctionne pas**
|
||||
|
||||
- Vérifiez `INITIAL_PASSWORD` dans `.env`
|
||||
- Si non défini, le mot de passe par défaut est `123456`
|
||||
|
||||
**Pas de logs de requêtes**
|
||||
|
||||
- Définissez `ENABLE_REQUEST_LOGS=true` dans `.env`
|
||||
|
||||
**Le test de connexion affiche « Invalid » pour les fournisseurs compatibles OpenAI**
|
||||
|
||||
- Beaucoup de fournisseurs n'exposent pas le point de terminaison `/models`
|
||||
- OmniRoute v1.0.6+ inclut une validation de secours via chat completions
|
||||
- Assurez-vous que l'URL de base inclut le suffixe `/v1`
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ Stack technologique
|
||||
|
||||
- **Runtime** : Node.js 20+
|
||||
- **Langage** : TypeScript 5.9 — **100% TypeScript** dans `src/` et `open-sse/` (v1.0.6)
|
||||
- **Framework** : Next.js 16 + React 19 + Tailwind CSS 4
|
||||
- **Base de données** : LowDB (JSON) + SQLite (état du domaine + logs proxy)
|
||||
- **Streaming** : Server-Sent Events (SSE)
|
||||
- **Auth** : OAuth 2.0 (PKCE) + JWT + API Keys
|
||||
- **Tests** : Node.js test runner (368+ tests unitaires)
|
||||
- **CI/CD** : GitHub Actions (publication automatique npm + Docker Hub lors du release)
|
||||
- **Site web** : [omniroute.online](https://omniroute.online)
|
||||
- **Package** : [npmjs.com/package/omniroute](https://www.npmjs.com/package/omniroute)
|
||||
- **Docker** : [hub.docker.com/r/diegosouzapw/omniroute](https://hub.docker.com/r/diegosouzapw/omniroute)
|
||||
- **Résilience** : Circuit breaker, backoff exponentiel, anti-thundering herd, spoofing TLS
|
||||
|
||||
---
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
| Document | Description |
|
||||
| ------------------------------------------ | --------------------------------------------------- |
|
||||
| [Guide utilisateur](docs/USER_GUIDE.md) | Fournisseurs, combos, intégration CLI, déploiement |
|
||||
| [Référence API](docs/API_REFERENCE.md) | Tous les endpoints avec exemples |
|
||||
| [Dépannage](docs/TROUBLESHOOTING.md) | Problèmes courants et solutions |
|
||||
| [Architecture](docs/ARCHITECTURE.md) | Architecture système et détails internes |
|
||||
| [Contribuer](CONTRIBUTING.md) | Configuration de développement et directives |
|
||||
| [Spécification OpenAPI](docs/openapi.yaml) | Spécification OpenAPI 3.0 |
|
||||
| [Politique de sécurité](SECURITY.md) | Signalement de vulnérabilités et pratiques sécurité |
|
||||
|
||||
---
|
||||
|
||||
## 📧 Support
|
||||
|
||||
> 💬 **Rejoignez notre communauté !** [Groupe WhatsApp](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t) — Obtenez de l'aide, partagez des astuces et restez informé.
|
||||
|
||||
- **Site web** : [omniroute.online](https://omniroute.online)
|
||||
- **GitHub** : [github.com/diegosouzapw/OmniRoute](https://github.com/diegosouzapw/OmniRoute)
|
||||
- **Issues** : [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **WhatsApp** : [Groupe communautaire](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t)
|
||||
- **Projet original** : [9router par decolua](https://github.com/decolua/9router)
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributeurs
|
||||
|
||||
[](https://github.com/diegosouzapw/OmniRoute/graphs/contributors)
|
||||
|
||||
### Comment contribuer
|
||||
|
||||
1. Forkez le dépôt
|
||||
2. Créez votre branche de fonctionnalité (`git checkout -b feature/amazing-feature`)
|
||||
3. Committez vos changements (`git commit -m 'Add amazing feature'`)
|
||||
4. Poussez vers la branche (`git push origin feature/amazing-feature`)
|
||||
5. Ouvrez une Pull Request
|
||||
|
||||
Consultez [CONTRIBUTING.md](CONTRIBUTING.md) pour les directives détaillées.
|
||||
|
||||
### Publier une nouvelle version
|
||||
|
||||
```bash
|
||||
# Créer un release — la publication npm est automatique
|
||||
gh release create v1.0.6 --title "v1.0.6" --generate-notes
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 Historique des Stars
|
||||
|
||||
<a href="https://star-history.com/#diegosouzapw/OmniRoute&Date">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
---
|
||||
|
||||
## 🙏 Remerciements
|
||||
|
||||
Remerciements spéciaux à **[9router](https://github.com/decolua/9router)** par **[decolua](https://github.com/decolua)** — le projet original qui a inspiré ce fork. OmniRoute construit sur cette base incroyable avec des fonctionnalités supplémentaires, des APIs multi-modales et une réécriture complète en TypeScript.
|
||||
|
||||
Remerciements spéciaux à **[CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI)** — l'implémentation originale en Go qui a inspiré ce portage en JavaScript.
|
||||
|
||||
---
|
||||
|
||||
## 📄 Licence
|
||||
|
||||
Licence MIT — voir [LICENSE](LICENSE) pour les détails.
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<sub>Fait avec ❤️ pour les développeurs qui codent 24/7</sub>
|
||||
<br/>
|
||||
<sub><a href="https://omniroute.online">omniroute.online</a></sub>
|
||||
</div>
|
||||
-999
@@ -1,999 +0,0 @@
|
||||
<div align="center">
|
||||
<img src="./docs/screenshots/MainOmniRoute.png" alt="OmniRoute Dashboard" width="800"/>
|
||||
|
||||
# 🚀 OmniRoute — Бесплатный AI Gateway
|
||||
|
||||
### Никогда не прекращайте программировать. Умная маршрутизация к **БЕСПЛАТНЫМ и дешёвым AI-моделям** с автоматическим fallback.
|
||||
|
||||
_Ваш универсальный API-прокси — одна точка доступа, 36+ провайдеров, нулевой простой._
|
||||
|
||||
**Chat Completions • Embeddings • Генерация изображений • Аудио • Reranking • 100% TypeScript**
|
||||
|
||||
---
|
||||
|
||||
### 🤖 Бесплатный AI-провайдер для ваших любимых агентов программирования
|
||||
|
||||
_Подключайте любую IDE или CLI-инструмент с AI через OmniRoute — бесплатный API gateway для неограниченного программирования._
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/cline/cline">
|
||||
<img src="./public/providers/openclaw.png" alt="OpenClaw" width="48"/><br/>
|
||||
<b>OpenClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 205K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/HKUDS/nanobot">
|
||||
<img src="./public/providers/nanobot.png" alt="NanoBot" width="48"/><br/>
|
||||
<b>NanoBot</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 20.9K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/sipeed/picoclaw">
|
||||
<img src="./public/providers/picoclaw.jpg" alt="PicoClaw" width="48"/><br/>
|
||||
<b>PicoClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 14.6K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/zeroclaw-labs/zeroclaw">
|
||||
<img src="./public/providers/zeroclaw.png" alt="ZeroClaw" width="48"/><br/>
|
||||
<b>ZeroClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 9.9K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/nearai/ironclaw">
|
||||
<img src="./public/providers/ironclaw.png" alt="IronClaw" width="48"/><br/>
|
||||
<b>IronClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 2.1K</sub>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/anomalyco/opencode">
|
||||
<img src="./public/providers/opencode.svg" alt="OpenCode" width="48"/><br/>
|
||||
<b>OpenCode</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 106K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/openai/codex">
|
||||
<img src="./public/providers/codex.png" alt="Codex CLI" width="48"/><br/>
|
||||
<b>Codex CLI</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 60.8K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/anthropics/claude-code">
|
||||
<img src="./public/providers/claude.png" alt="Claude Code" width="48"/><br/>
|
||||
<b>Claude Code</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 67.3K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/google-gemini/gemini-cli">
|
||||
<img src="./public/providers/gemini-cli.png" alt="Gemini CLI" width="48"/><br/>
|
||||
<b>Gemini CLI</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 94.7K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/Kilo-Org/kilocode">
|
||||
<img src="./public/providers/kilocode.png" alt="Kilo Code" width="48"/><br/>
|
||||
<b>Kilo Code</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 15.5K</sub>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<sub>📡 Все агенты подключаются через <code>http://localhost:20128/v1</code> или <code>http://cloud.omniroute.online/v1</code> — одна конфигурация, неограниченные модели и квота</sub>
|
||||
|
||||
---
|
||||
|
||||
[](https://www.npmjs.com/package/omniroute)
|
||||
[](https://hub.docker.com/r/diegosouzapw/omniroute)
|
||||
[](https://github.com/diegosouzapw/OmniRoute/blob/main/LICENSE)
|
||||
[](https://omniroute.online)
|
||||
[](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t)
|
||||
|
||||
[🌐 Сайт](https://omniroute.online) • [🚀 Быстрый старт](#-быстрый-старт) • [💡 Функции](#-основные-функции) • [📖 Документация](#-документация) • [💰 Цены](#-обзор-цен)
|
||||
|
||||
🌐 **Доступно на:** [English](README.md) | [Português](README.pt-BR.md) | [Español](README.es.md) | [Русский](README.ru.md) | [中文](README.zh-CN.md) | [Deutsch](README.de.md) | [Français](README.fr.md) | [Italiano](README.it.md)
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## 🤔 Почему OmniRoute?
|
||||
|
||||
**Перестаньте тратить деньги и упираться в лимиты:**
|
||||
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> Квота подписки истекает неиспользованной каждый месяц
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> Лимиты скорости останавливают вас посреди программирования
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> Дорогие API ($20-50/месяц за провайдера)
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> Ручное переключение между провайдерами
|
||||
|
||||
**OmniRoute решает это:**
|
||||
|
||||
- ✅ **Максимизируйте подписки** — Отслеживайте квоты, используйте всё до сброса
|
||||
- ✅ **Автоматический fallback** — Подписка → API Key → Дешёвый → Бесплатный, нулевой простой
|
||||
- ✅ **Мульти-аккаунт** — Round-robin между аккаунтами каждого провайдера
|
||||
- ✅ **Универсальный** — Работает с Claude Code, Codex, Gemini CLI, Cursor, Cline, OpenClaw, любым CLI-инструментом
|
||||
|
||||
---
|
||||
|
||||
## 🔄 Как это работает
|
||||
|
||||
```
|
||||
┌─────────────┐
|
||||
│ Ваш CLI │ (Claude Code, Codex, Gemini CLI, OpenClaw, Cursor, Cline...)
|
||||
│ Tool │
|
||||
└──────┬──────┘
|
||||
│ http://localhost:20128/v1
|
||||
↓
|
||||
┌─────────────────────────────────────────┐
|
||||
│ OmniRoute (Умный маршрутизатор) │
|
||||
│ • Трансляция формата (OpenAI ↔ Claude) │
|
||||
│ • Отслеживание квот + Embeddings + Изображения │
|
||||
│ • Автообновление токенов │
|
||||
└──────┬──────────────────────────────────┘
|
||||
│
|
||||
├─→ [Tier 1: ПОДПИСКА] Claude Code, Codex, Gemini CLI
|
||||
│ ↓ квота исчерпана
|
||||
├─→ [Tier 2: API KEY] DeepSeek, Groq, xAI, Mistral, NVIDIA NIM и др.
|
||||
│ ↓ лимит бюджета
|
||||
├─→ [Tier 3: ДЕШЁВЫЙ] GLM ($0.6/1M), MiniMax ($0.2/1M)
|
||||
│ ↓ лимит бюджета
|
||||
└─→ [Tier 4: БЕСПЛАТНЫЙ] iFlow, Qwen, Kiro (неограниченно)
|
||||
|
||||
Результат: Никогда не прекращайте программировать, минимальные затраты
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Быстрый старт
|
||||
|
||||
**1. Установите глобально:**
|
||||
|
||||
```bash
|
||||
npm install -g omniroute
|
||||
omniroute
|
||||
```
|
||||
|
||||
🎉 Dashboard открывается на `http://localhost:20128`
|
||||
|
||||
| Команда | Описание |
|
||||
| ----------------------- | ------------------------------------------ |
|
||||
| `omniroute` | Запустить сервер (порт по умолчанию 20128) |
|
||||
| `omniroute --port 3000` | Использовать другой порт |
|
||||
| `omniroute --no-open` | Не открывать браузер автоматически |
|
||||
| `omniroute --help` | Показать справку |
|
||||
|
||||
**2. Подключите БЕСПЛАТНОГО провайдера:**
|
||||
|
||||
Dashboard → Провайдеры → Подключить **Claude Code** или **Antigravity** → OAuth вход → Готово!
|
||||
|
||||
**3. Используйте в CLI-инструменте:**
|
||||
|
||||
```
|
||||
Claude Code/Codex/Gemini CLI/OpenClaw/Cursor/Cline Настройки:
|
||||
Endpoint: http://localhost:20128/v1
|
||||
API Key: [скопируйте из dashboard]
|
||||
Model: if/kimi-k2-thinking
|
||||
```
|
||||
|
||||
**Готово!** Начните программировать с БЕСПЛАТНЫМИ AI-моделями.
|
||||
|
||||
**Альтернатива — запуск из исходного кода:**
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
npm install
|
||||
PORT=20128 NEXT_PUBLIC_BASE_URL=http://localhost:20128 npm run dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🐳 Docker
|
||||
|
||||
OmniRoute доступен как публичный Docker-образ на [Docker Hub](https://hub.docker.com/r/diegosouzapw/omniroute).
|
||||
|
||||
**Быстрый запуск:**
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
**С файлом окружения:**
|
||||
|
||||
```bash
|
||||
# Скопируйте и отредактируйте .env
|
||||
cp .env.example .env
|
||||
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
--env-file .env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
**Используя Docker Compose:**
|
||||
|
||||
```bash
|
||||
# Базовый профиль (без CLI-инструментов)
|
||||
docker compose --profile base up -d
|
||||
|
||||
# CLI-профиль (Claude Code, Codex, OpenClaw встроены)
|
||||
docker compose --profile cli up -d
|
||||
```
|
||||
|
||||
| Образ | Тег | Размер | Описание |
|
||||
| ------------------------ | -------- | ------ | -------------------------- |
|
||||
| `diegosouzapw/omniroute` | `latest` | ~250MB | Последний стабильный релиз |
|
||||
| `diegosouzapw/omniroute` | `1.0.6` | ~250MB | Текущая версия |
|
||||
|
||||
---
|
||||
|
||||
## 💰 Обзор цен
|
||||
|
||||
| Tier | Провайдер | Стоимость | Сброс квоты | Лучше всего для |
|
||||
| ----------------- | ----------------- | ----------------------------- | ------------------ | -------------------------------- |
|
||||
| **💳 ПОДПИСКА** | Claude Code (Pro) | $20/мес | 5ч + еженедельно | Уже подписан |
|
||||
| | Codex (Plus/Pro) | $20-200/мес | 5ч + еженедельно | Пользователи OpenAI |
|
||||
| | Gemini CLI | **БЕСПЛАТНО** | 180K/мес + 1K/день | Все! |
|
||||
| | GitHub Copilot | $10-19/мес | Ежемесячно | Пользователи GitHub |
|
||||
| **🔑 API KEY** | NVIDIA NIM | **БЕСПЛАТНО** (1000 кредитов) | Одноразово | Бесплатное тестирование |
|
||||
| | DeepSeek | По использованию | Нет | Лучшее соотношение цена/качество |
|
||||
| | Groq | Беспл. уровень + платный | Ограничено | Сверхбыстрый вывод |
|
||||
| | xAI (Grok) | По использованию | Нет | Модели Grok |
|
||||
| | Mistral | Беспл. уровень + платный | Ограничено | Европейский AI |
|
||||
| | OpenRouter | По использованию | Нет | 100+ моделей |
|
||||
| **💰 ДЕШЁВЫЙ** | GLM-4.7 | $0.6/1M | Ежедневно 10ч | Бюджетный бэкап |
|
||||
| | MiniMax M2.1 | $0.2/1M | 5ч ротация | Самый дешёвый вариант |
|
||||
| | Kimi K2 | $9/мес фикс | 10M токенов/мес | Предсказуемая цена |
|
||||
| **🆓 БЕСПЛАТНЫЙ** | iFlow | $0 | Неограниченно | 8 бесплатных моделей |
|
||||
| | Qwen | $0 | Неограниченно | 3 бесплатные модели |
|
||||
| | Kiro | $0 | Неограниченно | Claude бесплатно |
|
||||
|
||||
**💡 Совет:** Начните с Gemini CLI (180K бесплатно/мес) + iFlow (неограниченно бесплатно) = $0!
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Сценарии использования
|
||||
|
||||
### Сценарий 1: «У меня подписка Claude Pro»
|
||||
|
||||
**Проблема:** Квота истекает неиспользованной, лимиты скорости во время интенсивного программирования
|
||||
|
||||
```
|
||||
Combo: "maximize-claude"
|
||||
1. cc/claude-opus-4-6 (используйте подписку полностью)
|
||||
2. glm/glm-4.7 (дешёвый бэкап при исчерпании квоты)
|
||||
3. if/kimi-k2-thinking (бесплатный аварийный fallback)
|
||||
|
||||
Месячная стоимость: $20 (подписка) + ~$5 (бэкап) = $25 итого
|
||||
vs. $20 + упирание в лимиты = разочарование
|
||||
```
|
||||
|
||||
### Сценарий 2: «Хочу нулевую стоимость»
|
||||
|
||||
**Проблема:** Не может позволить подписки, нужен надёжный AI для программирования
|
||||
|
||||
```
|
||||
Combo: "free-forever"
|
||||
1. gc/gemini-3-flash (180K бесплатно/мес)
|
||||
2. if/kimi-k2-thinking (неограниченно бесплатно)
|
||||
3. qw/qwen3-coder-plus (неограниченно бесплатно)
|
||||
|
||||
Месячная стоимость: $0
|
||||
Качество: Модели готовые к продакшену
|
||||
```
|
||||
|
||||
### Сценарий 3: «Мне нужно программировать 24/7, без перерывов»
|
||||
|
||||
**Проблема:** Дедлайны, не может позволить простой
|
||||
|
||||
```
|
||||
Combo: "always-on"
|
||||
1. cc/claude-opus-4-6 (лучшее качество)
|
||||
2. cx/gpt-5.2-codex (вторая подписка)
|
||||
3. glm/glm-4.7 (дешёвый, ежедневный сброс)
|
||||
4. minimax/MiniMax-M2.1 (самый дешёвый, сброс 5ч)
|
||||
5. if/kimi-k2-thinking (бесплатно неограниченно)
|
||||
|
||||
Результат: 5 уровней fallback = нулевой простой
|
||||
```
|
||||
|
||||
### Сценарий 4: «Хочу БЕСПЛАТНЫЙ AI в OpenClaw»
|
||||
|
||||
**Проблема:** Нужен AI-ассистент в мессенджерах, полностью бесплатно
|
||||
|
||||
```
|
||||
Combo: "openclaw-free"
|
||||
1. if/glm-4.7 (неограниченно бесплатно)
|
||||
2. if/minimax-m2.1 (неограниченно бесплатно)
|
||||
3. if/kimi-k2-thinking (неограниченно бесплатно)
|
||||
|
||||
Месячная стоимость: $0
|
||||
Доступ через: WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 💡 Основные функции
|
||||
|
||||
### 🧠 Маршрутизация и интеллект
|
||||
|
||||
| Функция | Что делает |
|
||||
| ------------------------------------------- | ----------------------------------------------------------------------------- |
|
||||
| 🎯 **Умный 4-уровневый Fallback** | Авто-маршрутизация: Подписка → API Key → Дешёвый → Бесплатный |
|
||||
| 📊 **Отслеживание квот в реальном времени** | Счётчик токенов в реальном времени + обратный отсчёт до сброса |
|
||||
| 🔄 **Трансляция формата** | OpenAI ↔ Claude ↔ Gemini ↔ Cursor ↔ Kiro бесшовно |
|
||||
| 👥 **Мульти-аккаунт** | Несколько аккаунтов на провайдера с интеллектуальным выбором |
|
||||
| 🔄 **Автообновление токенов** | OAuth-токены обновляются автоматически с повторами |
|
||||
| 🎨 **Пользовательские комбо** | 6 стратегий: fill-first, round-robin, p2c, random, least-used, cost-optimized |
|
||||
| 🧩 **Пользовательские модели** | Добавьте любой ID модели к любому провайдеру |
|
||||
| 🌐 **Wildcard-маршрутизатор** | Маршрутизируйте паттерны `provider/*` к любому провайдеру динамически |
|
||||
| 🧠 **Бюджет рассуждений** | Режимы passthrough, auto, custom и adaptive для моделей рассуждений |
|
||||
| 💬 **Инъекция System Prompt** | Глобальный system prompt для всех запросов |
|
||||
| 📄 **API Responses** | Полная поддержка OpenAI Responses API (`/v1/responses`) для Codex |
|
||||
|
||||
### 🎵 Мультимодальные API
|
||||
|
||||
| Функция | Что делает |
|
||||
| ---------------------------- | --------------------------------------------------- |
|
||||
| 🖼️ **Генерация изображений** | `/v1/images/generations` — 4 провайдера, 9+ моделей |
|
||||
| 📐 **Embeddings** | `/v1/embeddings` — 6 провайдеров, 9+ моделей |
|
||||
| 🎤 **Транскрипция аудио** | `/v1/audio/transcriptions` — Совместимо с Whisper |
|
||||
| 🔊 **Текст в речь** | `/v1/audio/speech` — Мульти-провайдерный синтез |
|
||||
| 🛡️ **Модерация** | `/v1/moderations` — Проверки безопасности контента |
|
||||
| 🔀 **Reranking** | `/v1/rerank` — Переранжирование релевантности |
|
||||
|
||||
### 🛡️ Устойчивость и безопасность
|
||||
|
||||
| Функция | Что делает |
|
||||
| -------------------------------- | -------------------------------------------------------------- |
|
||||
| 🔌 **Circuit Breaker** | Авто-открытие/закрытие по провайдеру с настраиваемыми порогами |
|
||||
| 🛡️ **Anti-Thundering Herd** | Mutex + семафор для API key провайдеров |
|
||||
| 🧠 **Семантический кеш** | Двухуровневый кеш (сигнатура + семантика) снижает стоимость |
|
||||
| ⚡ **Идемпотентность запросов** | 5с окно дедупликации для дублирующихся запросов |
|
||||
| 🔒 **Спуфинг TLS Fingerprint** | Обход обнаружения ботов через wreq-js |
|
||||
| 🌐 **Фильтрация IP** | Allowlist/blocklist для контроля доступа к API |
|
||||
| 📊 **Настраиваемые Rate Limits** | Настраиваемые RPM, минимальный интервал, макс. конкуррентность |
|
||||
|
||||
### 📊 Наблюдаемость и аналитика
|
||||
|
||||
| Функция | Что делает |
|
||||
| ----------------------------- | ------------------------------------------------------------------------ |
|
||||
| 📝 **Логи запросов** | Режим debug с полными логами запросов/ответов |
|
||||
| 💾 **Логи SQLite** | Постоянные proxy-логи переживают перезапуски |
|
||||
| 📊 **Dashboard аналитики** | Recharts: карточки статистики, график использования, таблица провайдеров |
|
||||
| 📈 **Отслеживание прогресса** | Opt-in SSE-события прогресса для стриминга |
|
||||
| 🧪 **Оценки LLM** | Тестирование с golden set и 4 стратегиями сравнения |
|
||||
| 🔍 **Телеметрия запросов** | Агрегация латентности p50/p95/p99 + трекинг X-Request-Id |
|
||||
| 📋 **Логи + Квоты** | Отдельные страницы для просмотра логов и отслеживания квот |
|
||||
| 🏥 **Dashboard здоровья** | Uptime, состояния circuit breaker, блокировки, статистика кеша |
|
||||
| 💰 **Отслеживание стоимости** | Управление бюджетом + настройка цен по моделям |
|
||||
|
||||
### ☁️ Деплой и синхронизация
|
||||
|
||||
| Функция | Что делает |
|
||||
| -------------------------- | --------------------------------------------------------------------------- |
|
||||
| 💾 **Cloud Sync** | Синхронизация настроек между устройствами через Cloudflare Workers |
|
||||
| 🌐 **Деплой куда угодно** | Localhost, VPS, Docker, Cloudflare Workers |
|
||||
| 🔑 **Управление API Keys** | Генерация, ротация и настройка scope API keys по провайдерам |
|
||||
| 🧙 **Мастер настройки** | 4-шаговая настройка для новых пользователей |
|
||||
| 🔧 **Dashboard CLI Tools** | Настройка в один клик для Claude, Codex, Cline, OpenClaw, Kilo, Antigravity |
|
||||
| 🔄 **Бэкапы БД** | Автоматическое резервное копирование и восстановление всех настроек |
|
||||
|
||||
<details>
|
||||
<summary><b>📖 Подробности функций</b></summary>
|
||||
|
||||
### 🎯 Умный 4-уровневый Fallback
|
||||
|
||||
Создавайте комбо с автоматическим fallback:
|
||||
|
||||
```
|
||||
Combo: "my-coding-stack"
|
||||
1. cc/claude-opus-4-6 (ваша подписка)
|
||||
2. nvidia/llama-3.3-70b (бесплатный NVIDIA API)
|
||||
3. glm/glm-4.7 (дешёвый бэкап, $0.6/1M)
|
||||
4. if/kimi-k2-thinking (бесплатный fallback)
|
||||
|
||||
→ Автоматически переключается при исчерпании квоты или ошибках
|
||||
```
|
||||
|
||||
### 📊 Отслеживание квот в реальном времени
|
||||
|
||||
- Потребление токенов по провайдерам
|
||||
- Обратный отсчёт до сброса (5 часов, ежедневно, еженедельно)
|
||||
- Оценка стоимости для платных уровней
|
||||
- Ежемесячные отчёты о расходах
|
||||
|
||||
### 🔄 Трансляция формата
|
||||
|
||||
Бесшовная трансляция между форматами:
|
||||
|
||||
- **OpenAI** ↔ **Claude** ↔ **Gemini** ↔ **OpenAI Responses**
|
||||
- Ваш CLI отправляет формат OpenAI → OmniRoute транслирует → Провайдер получает нативный формат
|
||||
- Работает с любым инструментом, поддерживающим пользовательские OpenAI endpoints
|
||||
|
||||
### 👥 Мульти-аккаунт
|
||||
|
||||
- Добавляйте несколько аккаунтов на провайдера
|
||||
- Автоматический round-robin или маршрутизация по приоритету
|
||||
- Fallback на следующий аккаунт при исчерпании квоты
|
||||
|
||||
### 🔄 Автообновление токенов
|
||||
|
||||
- OAuth-токены обновляются автоматически до истечения
|
||||
- Без необходимости ручной повторной аутентификации
|
||||
- Бесшовный опыт по всем провайдерам
|
||||
|
||||
### 🎨 Пользовательские комбо
|
||||
|
||||
- Создавайте неограниченные комбинации моделей
|
||||
- 6 стратегий: fill-first, round-robin, power-of-two-choices, random, least-used, cost-optimized
|
||||
- Делитесь комбо между устройствами с Cloud Sync
|
||||
|
||||
### 🏥 Dashboard здоровья
|
||||
|
||||
- Статус системы (uptime, версия, использование памяти)
|
||||
- Состояния circuit breaker по провайдерам (Closed/Open/Half-Open)
|
||||
- Статус rate limit и активные блокировки
|
||||
- Статистика кеша сигнатур
|
||||
- Телеметрия латентности (p50/p95/p99) + кеш промптов
|
||||
- Сброс состояния здоровья одним кликом
|
||||
|
||||
### 🔧 Playground транслятора
|
||||
|
||||
- Отладка, тестирование и визуализация трансляции форматов API
|
||||
- Отправляйте запросы и смотрите, как OmniRoute транслирует между форматами провайдеров
|
||||
- Бесценно для устранения проблем интеграции
|
||||
|
||||
### 💾 Cloud Sync
|
||||
|
||||
- Синхронизация провайдеров, комбо и настроек между устройствами
|
||||
- Автоматическая фоновая синхронизация
|
||||
- Безопасное шифрованное хранилище
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 📖 Руководство по настройке
|
||||
|
||||
<details>
|
||||
<summary><b>💳 Провайдеры по подписке</b></summary>
|
||||
|
||||
### Claude Code (Pro/Max)
|
||||
|
||||
```bash
|
||||
Dashboard → Провайдеры → Подключить Claude Code
|
||||
→ OAuth вход → Автообновление токенов
|
||||
→ Отслеживание квоты 5ч + еженедельно
|
||||
|
||||
Модели:
|
||||
cc/claude-opus-4-6
|
||||
cc/claude-sonnet-4-5-20250929
|
||||
cc/claude-haiku-4-5-20251001
|
||||
```
|
||||
|
||||
**Совет:** Используйте Opus для сложных задач, Sonnet для скорости. OmniRoute отслеживает квоту по моделям!
|
||||
|
||||
### OpenAI Codex (Plus/Pro)
|
||||
|
||||
```bash
|
||||
Dashboard → Провайдеры → Подключить Codex
|
||||
→ OAuth вход (порт 1455)
|
||||
→ Сброс 5ч + еженедельно
|
||||
|
||||
Модели:
|
||||
cx/gpt-5.2-codex
|
||||
cx/gpt-5.1-codex-max
|
||||
```
|
||||
|
||||
### Gemini CLI (БЕСПЛАТНО 180K/мес!)
|
||||
|
||||
```bash
|
||||
Dashboard → Провайдеры → Подключить Gemini CLI
|
||||
→ Google OAuth
|
||||
→ 180K completions/мес + 1K/день
|
||||
|
||||
Модели:
|
||||
gc/gemini-3-flash-preview
|
||||
gc/gemini-2.5-pro
|
||||
```
|
||||
|
||||
**Лучшая ценность:** Огромный бесплатный уровень! Используйте перед платными.
|
||||
|
||||
### GitHub Copilot
|
||||
|
||||
```bash
|
||||
Dashboard → Провайдеры → Подключить GitHub
|
||||
→ OAuth через GitHub
|
||||
→ Ежемесячный сброс (1-е число)
|
||||
|
||||
Модели:
|
||||
gh/gpt-5
|
||||
gh/claude-4.5-sonnet
|
||||
gh/gemini-3-pro
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔑 Провайдеры по API Key</b></summary>
|
||||
|
||||
### NVIDIA NIM (БЕСПЛАТНО 1000 кредитов!)
|
||||
|
||||
1. Регистрация: [build.nvidia.com](https://build.nvidia.com)
|
||||
2. Получите бесплатный API key (1000 кредитов включены)
|
||||
3. Dashboard → Добавить провайдера → NVIDIA NIM:
|
||||
- API Key: `nvapi-your-key`
|
||||
|
||||
**Модели:** `nvidia/llama-3.3-70b-instruct`, `nvidia/mistral-7b-instruct` и 50+ других
|
||||
|
||||
**Совет:** OpenAI-совместимый API — работает идеально с трансляцией форматов OmniRoute!
|
||||
|
||||
### DeepSeek
|
||||
|
||||
1. Регистрация: [platform.deepseek.com](https://platform.deepseek.com)
|
||||
2. Получите API key
|
||||
3. Dashboard → Добавить провайдера → DeepSeek
|
||||
|
||||
**Модели:** `deepseek/deepseek-chat`, `deepseek/deepseek-coder`
|
||||
|
||||
### Groq (Бесплатный уровень доступен!)
|
||||
|
||||
1. Регистрация: [console.groq.com](https://console.groq.com)
|
||||
2. Получите API key (бесплатный уровень включён)
|
||||
3. Dashboard → Добавить провайдера → Groq
|
||||
|
||||
**Модели:** `groq/llama-3.3-70b`, `groq/mixtral-8x7b`
|
||||
|
||||
**Совет:** Сверхбыстрый вывод — лучший для программирования в реальном времени!
|
||||
|
||||
### OpenRouter (100+ моделей)
|
||||
|
||||
1. Регистрация: [openrouter.ai](https://openrouter.ai)
|
||||
2. Получите API key
|
||||
3. Dashboard → Добавить провайдера → OpenRouter
|
||||
|
||||
**Модели:** Доступ к 100+ моделям от всех основных провайдеров через один API key.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>💰 Дешёвые провайдеры (Бэкап)</b></summary>
|
||||
|
||||
### GLM-4.7 (Ежедневный сброс, $0.6/1M)
|
||||
|
||||
1. Регистрация: [Zhipu AI](https://open.bigmodel.cn/)
|
||||
2. Получите API key из Coding Plan
|
||||
3. Dashboard → Добавить API Key:
|
||||
- Провайдер: `glm`
|
||||
- API Key: `your-key`
|
||||
|
||||
**Используйте:** `glm/glm-4.7`
|
||||
|
||||
**Совет:** Coding Plan предлагает 3× квоту по цене 1/7! Ежедневный сброс в 10:00.
|
||||
|
||||
### MiniMax M2.1 (Сброс 5ч, $0.20/1M)
|
||||
|
||||
1. Регистрация: [MiniMax](https://www.minimax.io/)
|
||||
2. Получите API key
|
||||
3. Dashboard → Добавить API Key
|
||||
|
||||
**Используйте:** `minimax/MiniMax-M2.1`
|
||||
|
||||
**Совет:** Самый дешёвый вариант для длинного контекста (1M токенов)!
|
||||
|
||||
### Kimi K2 ($9/мес фикс)
|
||||
|
||||
1. Подпишитесь: [Moonshot AI](https://platform.moonshot.ai/)
|
||||
2. Получите API key
|
||||
3. Dashboard → Добавить API Key
|
||||
|
||||
**Используйте:** `kimi/kimi-latest`
|
||||
|
||||
**Совет:** Фикс $9/мес за 10M токенов = $0.90/1M эффективная стоимость!
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🆓 БЕСПЛАТНЫЕ провайдеры (Аварийный бэкап)</b></summary>
|
||||
|
||||
### iFlow (8 БЕСПЛАТНЫХ моделей)
|
||||
|
||||
```bash
|
||||
Dashboard → Подключить iFlow
|
||||
→ OAuth вход iFlow
|
||||
→ Неограниченное использование
|
||||
|
||||
Модели:
|
||||
if/kimi-k2-thinking
|
||||
if/qwen3-coder-plus
|
||||
if/glm-4.7
|
||||
if/minimax-m2
|
||||
if/deepseek-r1
|
||||
```
|
||||
|
||||
### Qwen (3 БЕСПЛАТНЫЕ модели)
|
||||
|
||||
```bash
|
||||
Dashboard → Подключить Qwen
|
||||
→ Авторизация по коду устройства
|
||||
→ Неограниченное использование
|
||||
|
||||
Модели:
|
||||
qw/qwen3-coder-plus
|
||||
qw/qwen3-coder-flash
|
||||
```
|
||||
|
||||
### Kiro (Claude БЕСПЛАТНО)
|
||||
|
||||
```bash
|
||||
Dashboard → Подключить Kiro
|
||||
→ AWS Builder ID или Google/GitHub
|
||||
→ Неограниченное использование
|
||||
|
||||
Модели:
|
||||
kr/claude-sonnet-4.5
|
||||
kr/claude-haiku-4.5
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🎨 Создание комбо</b></summary>
|
||||
|
||||
### Пример 1: Максимизация подписки → Дешёвый бэкап
|
||||
|
||||
```
|
||||
Dashboard → Комбо → Создать новое
|
||||
|
||||
Название: premium-coding
|
||||
Модели:
|
||||
1. cc/claude-opus-4-6 (Основная подписка)
|
||||
2. glm/glm-4.7 (Дешёвый бэкап, $0.6/1M)
|
||||
3. minimax/MiniMax-M2.1 (Самый дешёвый fallback, $0.20/1M)
|
||||
|
||||
Используйте в CLI: premium-coding
|
||||
```
|
||||
|
||||
### Пример 2: Только бесплатные (Нулевая стоимость)
|
||||
|
||||
```
|
||||
Название: free-combo
|
||||
Модели:
|
||||
1. gc/gemini-3-flash-preview (180K бесплатно/мес)
|
||||
2. if/kimi-k2-thinking (неограниченно)
|
||||
3. qw/qwen3-coder-plus (неограниченно)
|
||||
|
||||
Стоимость: $0 навсегда!
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔧 Интеграция с CLI</b></summary>
|
||||
|
||||
### Cursor IDE
|
||||
|
||||
```
|
||||
Настройки → Модели → Расширенные:
|
||||
OpenAI API Base URL: http://localhost:20128/v1
|
||||
OpenAI API Key: [из dashboard OmniRoute]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
|
||||
Используйте страницу **CLI Tools** в dashboard для настройки в один клик, или редактируйте `~/.claude/settings.json` вручную.
|
||||
|
||||
### Codex CLI
|
||||
|
||||
```bash
|
||||
export OPENAI_BASE_URL="http://localhost:20128"
|
||||
export OPENAI_API_KEY="your-omniroute-api-key"
|
||||
|
||||
codex "your prompt"
|
||||
```
|
||||
|
||||
### OpenClaw
|
||||
|
||||
**Вариант 1 — Dashboard (рекомендуется):**
|
||||
|
||||
```
|
||||
Dashboard → CLI Tools → OpenClaw → Выбрать модель → Применить
|
||||
```
|
||||
|
||||
**Вариант 2 — Вручную:** Редактируйте `~/.openclaw/openclaw.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"models": {
|
||||
"providers": {
|
||||
"omniroute": {
|
||||
"baseUrl": "http://127.0.0.1:20128/v1",
|
||||
"apiKey": "sk_omniroute",
|
||||
"api": "openai-completions"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> **Примечание:** OpenClaw работает только с локальным OmniRoute. Используйте `127.0.0.1` вместо `localhost` для избежания проблем с IPv6.
|
||||
|
||||
### Cline / Continue / RooCode
|
||||
|
||||
```
|
||||
Настройки → Конфигурация API:
|
||||
Провайдер: OpenAI Compatible
|
||||
Base URL: http://localhost:20128/v1
|
||||
API Key: [из dashboard OmniRoute]
|
||||
Model: if/kimi-k2-thinking
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 📊 Доступные модели
|
||||
|
||||
<details>
|
||||
<summary><b>Посмотреть все доступные модели</b></summary>
|
||||
|
||||
**Claude Code (`cc/`)** - Pro/Max:
|
||||
|
||||
- `cc/claude-opus-4-6`
|
||||
- `cc/claude-sonnet-4-5-20250929`
|
||||
- `cc/claude-haiku-4-5-20251001`
|
||||
|
||||
**Codex (`cx/`)** - Plus/Pro:
|
||||
|
||||
- `cx/gpt-5.2-codex`
|
||||
- `cx/gpt-5.1-codex-max`
|
||||
|
||||
**Gemini CLI (`gc/`)** - БЕСПЛАТНО:
|
||||
|
||||
- `gc/gemini-3-flash-preview`
|
||||
- `gc/gemini-2.5-pro`
|
||||
|
||||
**GitHub Copilot (`gh/`)**:
|
||||
|
||||
- `gh/gpt-5`
|
||||
- `gh/claude-4.5-sonnet`
|
||||
|
||||
**NVIDIA NIM (`nvidia/`)** - БЕСПЛАТНЫЕ кредиты:
|
||||
|
||||
- `nvidia/llama-3.3-70b-instruct`
|
||||
- `nvidia/mistral-7b-instruct`
|
||||
- 50+ моделей на [build.nvidia.com](https://build.nvidia.com)
|
||||
|
||||
**GLM (`glm/`)** - $0.6/1M:
|
||||
|
||||
- `glm/glm-4.7`
|
||||
|
||||
**MiniMax (`minimax/`)** - $0.2/1M:
|
||||
|
||||
- `minimax/MiniMax-M2.1`
|
||||
|
||||
**iFlow (`if/`)** - БЕСПЛАТНО:
|
||||
|
||||
- `if/kimi-k2-thinking`
|
||||
- `if/qwen3-coder-plus`
|
||||
- `if/deepseek-r1`
|
||||
- `if/glm-4.7`
|
||||
- `if/minimax-m2`
|
||||
|
||||
**Qwen (`qw/`)** - БЕСПЛАТНО:
|
||||
|
||||
- `qw/qwen3-coder-plus`
|
||||
- `qw/qwen3-coder-flash`
|
||||
|
||||
**Kiro (`kr/`)** - БЕСПЛАТНО:
|
||||
|
||||
- `kr/claude-sonnet-4.5`
|
||||
- `kr/claude-haiku-4.5`
|
||||
|
||||
**OpenRouter (`or/`)** - 100+ моделей:
|
||||
|
||||
- `or/anthropic/claude-4-sonnet`
|
||||
- `or/google/gemini-2.5-pro`
|
||||
- Любая модель с [openrouter.ai/models](https://openrouter.ai/models)
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Оценки (Evals)
|
||||
|
||||
OmniRoute включает встроенный фреймворк оценки для тестирования качества ответов LLM по golden set. Доступ через **Analytics → Evals** в dashboard.
|
||||
|
||||
### Встроенный Golden Set
|
||||
|
||||
Предзагруженный «OmniRoute Golden Set» содержит 10 тестов:
|
||||
|
||||
- Приветствия, математика, география, генерация кода
|
||||
- Соответствие формату JSON, перевод, markdown
|
||||
- Отказ от небезопасного контента, подсчёт, булева логика
|
||||
|
||||
### Стратегии оценки
|
||||
|
||||
| Стратегия | Описание | Пример |
|
||||
| ---------- | ----------------------------------------------------- | -------------------------------- |
|
||||
| `exact` | Вывод должен совпадать точно | `"4"` |
|
||||
| `contains` | Вывод должен содержать подстроку (без учёта регистра) | `"Paris"` |
|
||||
| `regex` | Вывод должен соответствовать regex-паттерну | `"1.*2.*3"` |
|
||||
| `custom` | Пользовательская JS-функция возвращает true/false | `(output) => output.length > 10` |
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Устранение неполадок
|
||||
|
||||
<details>
|
||||
<summary><b>Нажмите для раскрытия руководства</b></summary>
|
||||
|
||||
**«Language model did not provide messages»**
|
||||
|
||||
- Квота провайдера исчерпана → Проверьте трекер квот в dashboard
|
||||
- Решение: Используйте комбо с fallback или переключитесь на более дешёвый уровень
|
||||
|
||||
**Rate limiting**
|
||||
|
||||
- Квота подписки исчерпана → Fallback на GLM/MiniMax
|
||||
- Добавьте комбо: `cc/claude-opus-4-6 → glm/glm-4.7 → if/kimi-k2-thinking`
|
||||
|
||||
**OAuth-токен истёк**
|
||||
|
||||
- Обновляется автоматически OmniRoute
|
||||
- Если проблема сохраняется: Dashboard → Провайдер → Переподключить
|
||||
|
||||
**Высокие расходы**
|
||||
|
||||
- Проверьте статистику в Dashboard → Расходы
|
||||
- Переключите основную модель на GLM/MiniMax
|
||||
- Используйте бесплатный уровень (Gemini CLI, iFlow) для некритичных задач
|
||||
|
||||
**Dashboard открывается на неправильном порту**
|
||||
|
||||
- Установите `PORT=20128` и `NEXT_PUBLIC_BASE_URL=http://localhost:20128`
|
||||
|
||||
**Ошибки cloud sync**
|
||||
|
||||
- Проверьте что `BASE_URL` указывает на ваш запущенный экземпляр
|
||||
- Проверьте что `CLOUD_URL` указывает на правильный облачный endpoint
|
||||
- Держите значения `NEXT_PUBLIC_*` синхронизированными с серверными значениями
|
||||
|
||||
**Первый вход не работает**
|
||||
|
||||
- Проверьте `INITIAL_PASSWORD` в `.env`
|
||||
- Если не задан, пароль по умолчанию `123456`
|
||||
|
||||
**Нет логов запросов**
|
||||
|
||||
- Установите `ENABLE_REQUEST_LOGS=true` в `.env`
|
||||
|
||||
**Тест подключения показывает «Invalid» для OpenAI-совместимых провайдеров**
|
||||
|
||||
- Многие провайдеры не предоставляют endpoint `/models`
|
||||
- OmniRoute v1.0.6+ включает fallback-валидацию через chat completions
|
||||
- Убедитесь что base URL содержит суффикс `/v1`
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ Технологический стек
|
||||
|
||||
- **Runtime**: Node.js 20+
|
||||
- **Язык**: TypeScript 5.9 — **100% TypeScript** в `src/` и `open-sse/` (v1.0.6)
|
||||
- **Framework**: Next.js 16 + React 19 + Tailwind CSS 4
|
||||
- **База данных**: LowDB (JSON) + SQLite (состояние домена + proxy-логи)
|
||||
- **Стриминг**: Server-Sent Events (SSE)
|
||||
- **Аутентификация**: OAuth 2.0 (PKCE) + JWT + API Keys
|
||||
- **Тестирование**: Node.js test runner (368+ юнит-тестов)
|
||||
- **CI/CD**: GitHub Actions (авто-публикация npm + Docker Hub при релизе)
|
||||
- **Сайт**: [omniroute.online](https://omniroute.online)
|
||||
- **Пакет**: [npmjs.com/package/omniroute](https://www.npmjs.com/package/omniroute)
|
||||
- **Docker**: [hub.docker.com/r/diegosouzapw/omniroute](https://hub.docker.com/r/diegosouzapw/omniroute)
|
||||
- **Устойчивость**: Circuit breaker, экспоненциальный backoff, anti-thundering herd, TLS-спуфинг
|
||||
|
||||
---
|
||||
|
||||
## 📖 Документация
|
||||
|
||||
| Документ | Описание |
|
||||
| ----------------------------------------------- | ------------------------------------------------ |
|
||||
| [Руководство пользователя](docs/USER_GUIDE.md) | Провайдеры, комбо, интеграция CLI, деплой |
|
||||
| [Справка API](docs/API_REFERENCE.md) | Все endpoints с примерами |
|
||||
| [Устранение неполадок](docs/TROUBLESHOOTING.md) | Частые проблемы и решения |
|
||||
| [Архитектура](docs/ARCHITECTURE.md) | Архитектура системы и внутреннее устройство |
|
||||
| [Как внести вклад](CONTRIBUTING.md) | Настройка разработки и руководящие принципы |
|
||||
| [Спецификация OpenAPI](docs/openapi.yaml) | Спецификация OpenAPI 3.0 |
|
||||
| [Политика безопасности](SECURITY.md) | Сообщение об уязвимостях и практики безопасности |
|
||||
|
||||
---
|
||||
|
||||
## 📧 Поддержка
|
||||
|
||||
> 💬 **Присоединяйтесь к сообществу!** [Группа WhatsApp](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t) — Получайте помощь, делитесь советами и оставайтесь в курсе.
|
||||
|
||||
- **Сайт**: [omniroute.online](https://omniroute.online)
|
||||
- **GitHub**: [github.com/diegosouzapw/OmniRoute](https://github.com/diegosouzapw/OmniRoute)
|
||||
- **Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **WhatsApp**: [Группа сообщества](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t)
|
||||
- **Оригинальный проект**: [9router от decolua](https://github.com/decolua/9router)
|
||||
|
||||
---
|
||||
|
||||
## 👥 Участники
|
||||
|
||||
[](https://github.com/diegosouzapw/OmniRoute/graphs/contributors)
|
||||
|
||||
### Как внести вклад
|
||||
|
||||
1. Сделайте fork репозитория
|
||||
2. Создайте ветку функции (`git checkout -b feature/amazing-feature`)
|
||||
3. Зафиксируйте изменения (`git commit -m 'Add amazing feature'`)
|
||||
4. Отправьте в ветку (`git push origin feature/amazing-feature`)
|
||||
5. Откройте Pull Request
|
||||
|
||||
См. [CONTRIBUTING.md](CONTRIBUTING.md) для подробных рекомендаций.
|
||||
|
||||
### Выпуск новой версии
|
||||
|
||||
```bash
|
||||
# Создайте релиз — публикация в npm происходит автоматически
|
||||
gh release create v1.0.6 --title "v1.0.6" --generate-notes
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 История звёзд
|
||||
|
||||
<a href="https://star-history.com/#diegosouzapw/OmniRoute&Date">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
---
|
||||
|
||||
## 🙏 Благодарности
|
||||
|
||||
Особая благодарность **[9router](https://github.com/decolua/9router)** от **[decolua](https://github.com/decolua)** — оригинальному проекту, вдохновившему этот форк. OmniRoute строится на этом невероятном фундаменте с дополнительными функциями, мультимодальными API и полной переписью на TypeScript.
|
||||
|
||||
Особая благодарность **[CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI)** — оригинальной реализации на Go, вдохновившей этот порт на JavaScript.
|
||||
|
||||
---
|
||||
|
||||
## 📄 Лицензия
|
||||
|
||||
Лицензия MIT — см. [LICENSE](LICENSE) для подробностей.
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<sub>Сделано с ❤️ для разработчиков, которые программируют 24/7</sub>
|
||||
<br/>
|
||||
<sub><a href="https://omniroute.online">omniroute.online</a></sub>
|
||||
</div>
|
||||
-999
@@ -1,999 +0,0 @@
|
||||
<div align="center">
|
||||
<img src="./docs/screenshots/MainOmniRoute.png" alt="OmniRoute Dashboard" width="800"/>
|
||||
|
||||
# 🚀 OmniRoute — 免费 AI 网关
|
||||
|
||||
### 永不停止编程。智能路由至**免费和低成本 AI 模型**,自动故障转移。
|
||||
|
||||
_您的通用 API 代理 — 一个端点,36+ 提供商,零停机时间。_
|
||||
|
||||
**Chat Completions • Embeddings • 图像生成 • 音频 • Reranking • 100% TypeScript**
|
||||
|
||||
---
|
||||
|
||||
### 🤖 为您最爱的编程代理提供免费 AI
|
||||
|
||||
_通过 OmniRoute 连接任何 AI 驱动的 IDE 或 CLI 工具 — 免费 API 网关,无限编程。_
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/cline/cline">
|
||||
<img src="./public/providers/openclaw.png" alt="OpenClaw" width="48"/><br/>
|
||||
<b>OpenClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 205K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/HKUDS/nanobot">
|
||||
<img src="./public/providers/nanobot.png" alt="NanoBot" width="48"/><br/>
|
||||
<b>NanoBot</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 20.9K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/sipeed/picoclaw">
|
||||
<img src="./public/providers/picoclaw.jpg" alt="PicoClaw" width="48"/><br/>
|
||||
<b>PicoClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 14.6K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/zeroclaw-labs/zeroclaw">
|
||||
<img src="./public/providers/zeroclaw.png" alt="ZeroClaw" width="48"/><br/>
|
||||
<b>ZeroClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 9.9K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/nearai/ironclaw">
|
||||
<img src="./public/providers/ironclaw.png" alt="IronClaw" width="48"/><br/>
|
||||
<b>IronClaw</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 2.1K</sub>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/anomalyco/opencode">
|
||||
<img src="./public/providers/opencode.svg" alt="OpenCode" width="48"/><br/>
|
||||
<b>OpenCode</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 106K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/openai/codex">
|
||||
<img src="./public/providers/codex.png" alt="Codex CLI" width="48"/><br/>
|
||||
<b>Codex CLI</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 60.8K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/anthropics/claude-code">
|
||||
<img src="./public/providers/claude.png" alt="Claude Code" width="48"/><br/>
|
||||
<b>Claude Code</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 67.3K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/google-gemini/gemini-cli">
|
||||
<img src="./public/providers/gemini-cli.png" alt="Gemini CLI" width="48"/><br/>
|
||||
<b>Gemini CLI</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 94.7K</sub>
|
||||
</td>
|
||||
<td align="center" width="110">
|
||||
<a href="https://github.com/Kilo-Org/kilocode">
|
||||
<img src="./public/providers/kilocode.png" alt="Kilo Code" width="48"/><br/>
|
||||
<b>Kilo Code</b>
|
||||
</a><br/>
|
||||
<sub>⭐ 15.5K</sub>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<sub>📡 所有代理通过 <code>http://localhost:20128/v1</code> 或 <code>http://cloud.omniroute.online/v1</code> 连接 — 一个配置,无限模型和配额</sub>
|
||||
|
||||
---
|
||||
|
||||
[](https://www.npmjs.com/package/omniroute)
|
||||
[](https://hub.docker.com/r/diegosouzapw/omniroute)
|
||||
[](https://github.com/diegosouzapw/OmniRoute/blob/main/LICENSE)
|
||||
[](https://omniroute.online)
|
||||
[](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t)
|
||||
|
||||
[🌐 网站](https://omniroute.online) • [🚀 快速开始](#-快速开始) • [💡 功能特性](#-核心功能) • [📖 文档](#-文档) • [💰 定价](#-定价概览)
|
||||
|
||||
🌐 **多语言版本:** [English](README.md) | [Português](README.pt-BR.md) | [Español](README.es.md) | [Русский](README.ru.md) | [中文](README.zh-CN.md) | [Deutsch](README.de.md) | [Français](README.fr.md) | [Italiano](README.it.md)
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## 🤔 为什么选择 OmniRoute?
|
||||
|
||||
**停止浪费金钱和遭遇限制:**
|
||||
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> 订阅配额每月未使用就过期
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> 速率限制在编程中途停止你
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> 昂贵的 API(每个提供商 $20-50/月)
|
||||
- <img src="https://img.shields.io/badge/✗-e74c3c?style=flat-square" height="16"/> 手动在提供商间切换
|
||||
|
||||
**OmniRoute 解决这些问题:**
|
||||
|
||||
- ✅ **最大化订阅** — 追踪配额,在重置前用完每一点
|
||||
- ✅ **自动故障转移** — 订阅 → API Key → 低价 → 免费,零停机
|
||||
- ✅ **多账号** — 每个提供商的账号轮询
|
||||
- ✅ **通用** — 适用于 Claude Code、Codex、Gemini CLI、Cursor、Cline、OpenClaw、任何 CLI 工具
|
||||
|
||||
---
|
||||
|
||||
## 🔄 工作原理
|
||||
|
||||
```
|
||||
┌─────────────┐
|
||||
│ 您的 CLI │ (Claude Code, Codex, Gemini CLI, OpenClaw, Cursor, Cline...)
|
||||
│ 工具 │
|
||||
└──────┬──────┘
|
||||
│ http://localhost:20128/v1
|
||||
↓
|
||||
┌─────────────────────────────────────────┐
|
||||
│ OmniRoute(智能路由器) │
|
||||
│ • 格式转换(OpenAI ↔ Claude) │
|
||||
│ • 配额追踪 + Embeddings + 图像 │
|
||||
│ • 自动令牌刷新 │
|
||||
└──────┬──────────────────────────────────┘
|
||||
│
|
||||
├─→ [第1层: 订阅] Claude Code, Codex, Gemini CLI
|
||||
│ ↓ 配额用完
|
||||
├─→ [第2层: API KEY] DeepSeek, Groq, xAI, Mistral, NVIDIA NIM 等
|
||||
│ ↓ 预算限制
|
||||
├─→ [第3层: 低价] GLM ($0.6/1M), MiniMax ($0.2/1M)
|
||||
│ ↓ 预算限制
|
||||
└─→ [第4层: 免费] iFlow, Qwen, Kiro(无限制)
|
||||
|
||||
结果:永不停止编程,成本最低
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚡ 快速开始
|
||||
|
||||
**1. 全局安装:**
|
||||
|
||||
```bash
|
||||
npm install -g omniroute
|
||||
omniroute
|
||||
```
|
||||
|
||||
🎉 仪表板在 `http://localhost:20128` 打开
|
||||
|
||||
| 命令 | 描述 |
|
||||
| ----------------------- | ---------------------------- |
|
||||
| `omniroute` | 启动服务器(默认端口 20128) |
|
||||
| `omniroute --port 3000` | 使用自定义端口 |
|
||||
| `omniroute --no-open` | 不自动打开浏览器 |
|
||||
| `omniroute --help` | 显示帮助 |
|
||||
|
||||
**2. 连接免费提供商:**
|
||||
|
||||
仪表板 → 提供商 → 连接 **Claude Code** 或 **Antigravity** → OAuth 登录 → 完成!
|
||||
|
||||
**3. 在 CLI 工具中使用:**
|
||||
|
||||
```
|
||||
Claude Code/Codex/Gemini CLI/OpenClaw/Cursor/Cline 设置:
|
||||
Endpoint: http://localhost:20128/v1
|
||||
API Key: [从仪表板复制]
|
||||
Model: if/kimi-k2-thinking
|
||||
```
|
||||
|
||||
**完成!** 开始使用免费 AI 模型编程。
|
||||
|
||||
**替代方案 — 从源代码运行:**
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
npm install
|
||||
PORT=20128 NEXT_PUBLIC_BASE_URL=http://localhost:20128 npm run dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🐳 Docker
|
||||
|
||||
OmniRoute 作为公共 Docker 镜像在 [Docker Hub](https://hub.docker.com/r/diegosouzapw/omniroute) 上可用。
|
||||
|
||||
**快速运行:**
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
**使用环境文件:**
|
||||
|
||||
```bash
|
||||
# 先复制并编辑 .env
|
||||
cp .env.example .env
|
||||
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
--env-file .env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
**使用 Docker Compose:**
|
||||
|
||||
```bash
|
||||
# 基础配置(无 CLI 工具)
|
||||
docker compose --profile base up -d
|
||||
|
||||
# CLI 配置(内置 Claude Code、Codex、OpenClaw)
|
||||
docker compose --profile cli up -d
|
||||
```
|
||||
|
||||
| 镜像 | 标签 | 大小 | 描述 |
|
||||
| ------------------------ | -------- | ------ | ---------- |
|
||||
| `diegosouzapw/omniroute` | `latest` | ~250MB | 最新稳定版 |
|
||||
| `diegosouzapw/omniroute` | `1.0.6` | ~250MB | 当前版本 |
|
||||
|
||||
---
|
||||
|
||||
## 💰 定价概览
|
||||
|
||||
| 层级 | 提供商 | 费用 | 配额重置 | 最适合 |
|
||||
| -------------- | ----------------- | --------------------- | --------------- | ------------ |
|
||||
| **💳 订阅** | Claude Code (Pro) | $20/月 | 5小时 + 每周 | 已订阅用户 |
|
||||
| | Codex (Plus/Pro) | $20-200/月 | 5小时 + 每周 | OpenAI 用户 |
|
||||
| | Gemini CLI | **免费** | 180K/月 + 1K/天 | 所有人! |
|
||||
| | GitHub Copilot | $10-19/月 | 每月 | GitHub 用户 |
|
||||
| **🔑 API KEY** | NVIDIA NIM | **免费**(1000 积分) | 一次性 | 免费测试 |
|
||||
| | DeepSeek | 按使用量 | 无 | 最佳性价比 |
|
||||
| | Groq | 免费层 + 付费 | 限速 | 超快推理 |
|
||||
| | xAI (Grok) | 按使用量 | 无 | Grok 模型 |
|
||||
| | Mistral | 免费层 + 付费 | 限速 | 欧洲 AI |
|
||||
| | OpenRouter | 按使用量 | 无 | 100+ 模型 |
|
||||
| **💰 低价** | GLM-4.7 | $0.6/1M | 每日 10时 | 经济备用 |
|
||||
| | MiniMax M2.1 | $0.2/1M | 5小时滚动 | 最便宜选项 |
|
||||
| | Kimi K2 | $9/月固定 | 每月 10M Token | 可预测成本 |
|
||||
| **🆓 免费** | iFlow | $0 | 无限制 | 8 个免费模型 |
|
||||
| | Qwen | $0 | 无限制 | 3 个免费模型 |
|
||||
| | Kiro | $0 | 无限制 | 免费 Claude |
|
||||
|
||||
**💡 专业建议:** 从 Gemini CLI(每月 180K 免费)+ iFlow(无限免费)开始 = $0 成本!
|
||||
|
||||
---
|
||||
|
||||
## 🎯 使用场景
|
||||
|
||||
### 场景 1:"我有 Claude Pro 订阅"
|
||||
|
||||
**问题:** 配额未使用就过期,编程高峰期遇到速率限制
|
||||
|
||||
```
|
||||
Combo: "maximize-claude"
|
||||
1. cc/claude-opus-4-6 (充分使用订阅)
|
||||
2. glm/glm-4.7 (配额用完时的便宜备用)
|
||||
3. if/kimi-k2-thinking (免费应急后备)
|
||||
|
||||
每月成本:$20(订阅)+ ~$5(备用)= $25 总计
|
||||
对比:$20 + 遇到限制 = 受挫
|
||||
```
|
||||
|
||||
### 场景 2:"我想要零成本"
|
||||
|
||||
**问题:** 无法承担订阅费用,需要可靠的 AI 编程
|
||||
|
||||
```
|
||||
Combo: "free-forever"
|
||||
1. gc/gemini-3-flash (每月 180K 免费)
|
||||
2. if/kimi-k2-thinking (无限免费)
|
||||
3. qw/qwen3-coder-plus (无限免费)
|
||||
|
||||
每月成本:$0
|
||||
质量:生产级模型
|
||||
```
|
||||
|
||||
### 场景 3:"我需要 24/7 编程,不中断"
|
||||
|
||||
**问题:** 截止日期紧迫,不能有停机时间
|
||||
|
||||
```
|
||||
Combo: "always-on"
|
||||
1. cc/claude-opus-4-6 (最佳质量)
|
||||
2. cx/gpt-5.2-codex (第二个订阅)
|
||||
3. glm/glm-4.7 (便宜,每日重置)
|
||||
4. minimax/MiniMax-M2.1 (最便宜,5小时重置)
|
||||
5. if/kimi-k2-thinking (免费无限制)
|
||||
|
||||
结果:5 层故障转移 = 零停机
|
||||
```
|
||||
|
||||
### 场景 4:"我想在 OpenClaw 中使用免费 AI"
|
||||
|
||||
**问题:** 需要在消息应用中使用 AI 助手,完全免费
|
||||
|
||||
```
|
||||
Combo: "openclaw-free"
|
||||
1. if/glm-4.7 (无限免费)
|
||||
2. if/minimax-m2.1 (无限免费)
|
||||
3. if/kimi-k2-thinking (无限免费)
|
||||
|
||||
每月成本:$0
|
||||
访问方式:WhatsApp、Telegram、Slack、Discord、iMessage、Signal...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 💡 核心功能
|
||||
|
||||
### 🧠 路由与智能
|
||||
|
||||
| 功能 | 功能描述 |
|
||||
| ------------------------- | -------------------------------------------------------------------------- |
|
||||
| 🎯 **智能 4 层故障转移** | 自动路由:订阅 → API Key → 低价 → 免费 |
|
||||
| 📊 **实时配额追踪** | 实时 Token 计数 + 每个提供商的重置倒计时 |
|
||||
| 🔄 **格式转换** | OpenAI ↔ Claude ↔ Gemini ↔ Cursor ↔ Kiro 无缝切换 |
|
||||
| 👥 **多账号支持** | 每个提供商多个账号,智能选择 |
|
||||
| 🔄 **自动令牌刷新** | OAuth 令牌自动刷新并重试 |
|
||||
| 🎨 **自定义组合** | 6 种策略:fill-first、round-robin、p2c、random、least-used、cost-optimized |
|
||||
| 🧩 **自定义模型** | 为任何提供商添加任何模型 ID |
|
||||
| 🌐 **通配符路由** | 动态路由 `provider/*` 模式到任何提供商 |
|
||||
| 🧠 **推理预算** | passthrough、auto、custom 和 adaptive 模式用于推理模型 |
|
||||
| 💬 **System Prompt 注入** | 全局 System Prompt 应用于所有请求 |
|
||||
| 📄 **Responses API** | 完整支持 OpenAI Responses API (`/v1/responses`) 用于 Codex |
|
||||
|
||||
### 🎵 多模态 API
|
||||
|
||||
| 功能 | 功能描述 |
|
||||
| ----------------- | ---------------------------------------------- |
|
||||
| 🖼️ **图像生成** | `/v1/images/generations` — 4 个提供商,9+ 模型 |
|
||||
| 📐 **Embeddings** | `/v1/embeddings` — 6 个提供商,9+ 模型 |
|
||||
| 🎤 **音频转录** | `/v1/audio/transcriptions` — Whisper 兼容 |
|
||||
| 🔊 **文字转语音** | `/v1/audio/speech` — 多提供商音频合成 |
|
||||
| 🛡️ **内容审核** | `/v1/moderations` — 内容安全检查 |
|
||||
| 🔀 **重排序** | `/v1/rerank` — 文档相关性重排序 |
|
||||
|
||||
### 🛡️ 弹性与安全
|
||||
|
||||
| 功能 | 功能描述 |
|
||||
| --------------------- | -------------------------------------- |
|
||||
| 🔌 **断路器** | 每个提供商自动打开/关闭,可配置阈值 |
|
||||
| 🛡️ **反惊群** | Mutex + 信号量限速用于 API Key 提供商 |
|
||||
| 🧠 **语义缓存** | 两层缓存(签名 + 语义)降低成本和延迟 |
|
||||
| ⚡ **请求幂等性** | 5 秒去重窗口防止重复请求 |
|
||||
| 🔒 **TLS 指纹伪装** | 通过 wreq-js 绕过基于 TLS 的机器人检测 |
|
||||
| 🌐 **IP 过滤** | 白名单/黑名单用于 API 访问控制 |
|
||||
| 📊 **可编辑速率限制** | 可配置的 RPM、最小间隔和最大并发 |
|
||||
|
||||
### 📊 可观察性与分析
|
||||
|
||||
| 功能 | 功能描述 |
|
||||
| ------------------ | ------------------------------------------ |
|
||||
| 📝 **请求日志** | 调试模式,完整的请求/响应日志 |
|
||||
| 💾 **SQLite 日志** | 持久化代理日志,服务器重启后仍然保留 |
|
||||
| 📊 **分析仪表板** | Recharts:统计卡片、使用量图表、提供商表格 |
|
||||
| 📈 **进度追踪** | 流式传输的 SSE 进度事件(可选) |
|
||||
| 🧪 **LLM 评估** | 黄金集测试,4 种匹配策略 |
|
||||
| 🔍 **请求遥测** | p50/p95/p99 延迟聚合 + X-Request-Id 追踪 |
|
||||
| 📋 **日志 + 配额** | 专用页面用于日志浏览和配额追踪 |
|
||||
| 🏥 **健康仪表板** | 运行时间、断路器状态、锁定、缓存统计 |
|
||||
| 💰 **成本追踪** | 预算管理 + 每模型定价配置 |
|
||||
|
||||
### ☁️ 部署与同步
|
||||
|
||||
| 功能 | 功能描述 |
|
||||
| --------------------- | ---------------------------------------------------------- |
|
||||
| 💾 **Cloud Sync** | 通过 Cloudflare Workers 在设备间同步配置 |
|
||||
| 🌐 **随处部署** | Localhost、VPS、Docker、Cloudflare Workers |
|
||||
| 🔑 **API Key 管理** | 按提供商生成、轮换和设定 API Key 范围 |
|
||||
| 🧙 **配置向导** | 4 步引导式设置,面向新用户 |
|
||||
| 🔧 **CLI 工具仪表板** | 一键配置 Claude、Codex、Cline、OpenClaw、Kilo、Antigravity |
|
||||
| 🔄 **数据库备份** | 自动备份和恢复所有设置 |
|
||||
|
||||
<details>
|
||||
<summary><b>📖 功能详情</b></summary>
|
||||
|
||||
### 🎯 智能 4 层故障转移
|
||||
|
||||
创建带自动故障转移的组合:
|
||||
|
||||
```
|
||||
Combo: "my-coding-stack"
|
||||
1. cc/claude-opus-4-6 (您的订阅)
|
||||
2. nvidia/llama-3.3-70b (免费 NVIDIA API)
|
||||
3. glm/glm-4.7 (便宜备用,$0.6/1M)
|
||||
4. if/kimi-k2-thinking (免费后备)
|
||||
|
||||
→ 配额用完或出错时自动切换
|
||||
```
|
||||
|
||||
### 📊 实时配额追踪
|
||||
|
||||
- 每个提供商的 Token 消耗
|
||||
- 重置倒计时(5 小时、每日、每周)
|
||||
- 付费层级的成本估算
|
||||
- 月度支出报告
|
||||
|
||||
### 🔄 格式转换
|
||||
|
||||
格式间的无缝转换:
|
||||
|
||||
- **OpenAI** ↔ **Claude** ↔ **Gemini** ↔ **OpenAI Responses**
|
||||
- 您的 CLI 发送 OpenAI 格式 → OmniRoute 转换 → 提供商接收原生格式
|
||||
- 适用于任何支持自定义 OpenAI 端点的工具
|
||||
|
||||
### 👥 多账号支持
|
||||
|
||||
- 每个提供商添加多个账号
|
||||
- 自动轮询或基于优先级的路由
|
||||
- 当一个账号达到配额时自动切换到下一个
|
||||
|
||||
### 🔄 自动令牌刷新
|
||||
|
||||
- OAuth 令牌在过期前自动刷新
|
||||
- 无需手动重新认证
|
||||
- 所有提供商的无缝体验
|
||||
|
||||
### 🎨 自定义组合
|
||||
|
||||
- 创建无限模型组合
|
||||
- 6 种策略:fill-first、round-robin、power-of-two-choices、random、least-used、cost-optimized
|
||||
- 通过 Cloud Sync 在设备间共享组合
|
||||
|
||||
### 🏥 健康仪表板
|
||||
|
||||
- 系统状态(运行时间、版本、内存使用)
|
||||
- 每个提供商的断路器状态(Closed/Open/Half-Open)
|
||||
- 速率限制状态和活动锁定
|
||||
- 签名缓存统计
|
||||
- 延迟遥测(p50/p95/p99)+ 提示缓存
|
||||
- 一键重置健康状态
|
||||
|
||||
### 🔧 翻译器 Playground
|
||||
|
||||
- 调试、测试和可视化 API 格式转换
|
||||
- 发送请求并查看 OmniRoute 如何在提供商格式间转换
|
||||
- 对排查集成问题非常有价值
|
||||
|
||||
### 💾 Cloud Sync
|
||||
|
||||
- 在设备间同步提供商、组合和设置
|
||||
- 自动后台同步
|
||||
- 安全加密存储
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 📖 设置指南
|
||||
|
||||
<details>
|
||||
<summary><b>💳 订阅提供商</b></summary>
|
||||
|
||||
### Claude Code (Pro/Max)
|
||||
|
||||
```bash
|
||||
仪表板 → 提供商 → 连接 Claude Code
|
||||
→ OAuth 登录 → 自动令牌刷新
|
||||
→ 5 小时 + 每周配额追踪
|
||||
|
||||
模型:
|
||||
cc/claude-opus-4-6
|
||||
cc/claude-sonnet-4-5-20250929
|
||||
cc/claude-haiku-4-5-20251001
|
||||
```
|
||||
|
||||
**专业建议:** 复杂任务用 Opus,追求速度用 Sonnet。OmniRoute 按模型追踪配额!
|
||||
|
||||
### OpenAI Codex (Plus/Pro)
|
||||
|
||||
```bash
|
||||
仪表板 → 提供商 → 连接 Codex
|
||||
→ OAuth 登录(端口 1455)
|
||||
→ 5 小时 + 每周重置
|
||||
|
||||
模型:
|
||||
cx/gpt-5.2-codex
|
||||
cx/gpt-5.1-codex-max
|
||||
```
|
||||
|
||||
### Gemini CLI(免费 180K/月!)
|
||||
|
||||
```bash
|
||||
仪表板 → 提供商 → 连接 Gemini CLI
|
||||
→ Google OAuth
|
||||
→ 每月 180K completions + 每天 1K
|
||||
|
||||
模型:
|
||||
gc/gemini-3-flash-preview
|
||||
gc/gemini-2.5-pro
|
||||
```
|
||||
|
||||
**最佳价值:** 巨大的免费额度!在付费层级之前使用。
|
||||
|
||||
### GitHub Copilot
|
||||
|
||||
```bash
|
||||
仪表板 → 提供商 → 连接 GitHub
|
||||
→ 通过 GitHub OAuth
|
||||
→ 每月重置(每月 1 日)
|
||||
|
||||
模型:
|
||||
gh/gpt-5
|
||||
gh/claude-4.5-sonnet
|
||||
gh/gemini-3-pro
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔑 API Key 提供商</b></summary>
|
||||
|
||||
### NVIDIA NIM(免费 1000 积分!)
|
||||
|
||||
1. 注册:[build.nvidia.com](https://build.nvidia.com)
|
||||
2. 获取免费 API key(包含 1000 推理积分)
|
||||
3. 仪表板 → 添加提供商 → NVIDIA NIM:
|
||||
- API Key:`nvapi-your-key`
|
||||
|
||||
**模型:** `nvidia/llama-3.3-70b-instruct`、`nvidia/mistral-7b-instruct` 及 50+ 更多
|
||||
|
||||
**专业建议:** OpenAI 兼容的 API — 与 OmniRoute 的格式转换完美配合!
|
||||
|
||||
### DeepSeek
|
||||
|
||||
1. 注册:[platform.deepseek.com](https://platform.deepseek.com)
|
||||
2. 获取 API key
|
||||
3. 仪表板 → 添加提供商 → DeepSeek
|
||||
|
||||
**模型:** `deepseek/deepseek-chat`、`deepseek/deepseek-coder`
|
||||
|
||||
### Groq(免费层可用!)
|
||||
|
||||
1. 注册:[console.groq.com](https://console.groq.com)
|
||||
2. 获取 API key(包含免费层)
|
||||
3. 仪表板 → 添加提供商 → Groq
|
||||
|
||||
**模型:** `groq/llama-3.3-70b`、`groq/mixtral-8x7b`
|
||||
|
||||
**专业建议:** 超快推理 — 最适合实时编程!
|
||||
|
||||
### OpenRouter(100+ 模型)
|
||||
|
||||
1. 注册:[openrouter.ai](https://openrouter.ai)
|
||||
2. 获取 API key
|
||||
3. 仪表板 → 添加提供商 → OpenRouter
|
||||
|
||||
**模型:** 通过一个 API key 访问所有主要提供商的 100+ 模型。
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>💰 低价提供商(备用)</b></summary>
|
||||
|
||||
### GLM-4.7(每日重置,$0.6/1M)
|
||||
|
||||
1. 注册:[Zhipu AI](https://open.bigmodel.cn/)
|
||||
2. 从 Coding Plan 获取 API key
|
||||
3. 仪表板 → 添加 API Key:
|
||||
- 提供商:`glm`
|
||||
- API Key:`your-key`
|
||||
|
||||
**使用:** `glm/glm-4.7`
|
||||
|
||||
**专业建议:** Coding Plan 以 1/7 的价格提供 3 倍配额!每日 10:00 AM 重置。
|
||||
|
||||
### MiniMax M2.1(5 小时重置,$0.20/1M)
|
||||
|
||||
1. 注册:[MiniMax](https://www.minimax.io/)
|
||||
2. 获取 API key
|
||||
3. 仪表板 → 添加 API Key
|
||||
|
||||
**使用:** `minimax/MiniMax-M2.1`
|
||||
|
||||
**专业建议:** 长上下文(1M Token)最便宜的选项!
|
||||
|
||||
### Kimi K2($9/月固定)
|
||||
|
||||
1. 订阅:[Moonshot AI](https://platform.moonshot.ai/)
|
||||
2. 获取 API key
|
||||
3. 仪表板 → 添加 API Key
|
||||
|
||||
**使用:** `kimi/kimi-latest`
|
||||
|
||||
**专业建议:** 固定 $9/月 10M Token = $0.90/1M 有效成本!
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🆓 免费提供商(应急备用)</b></summary>
|
||||
|
||||
### iFlow(8 个免费模型)
|
||||
|
||||
```bash
|
||||
仪表板 → 连接 iFlow
|
||||
→ iFlow OAuth 登录
|
||||
→ 无限使用
|
||||
|
||||
模型:
|
||||
if/kimi-k2-thinking
|
||||
if/qwen3-coder-plus
|
||||
if/glm-4.7
|
||||
if/minimax-m2
|
||||
if/deepseek-r1
|
||||
```
|
||||
|
||||
### Qwen(3 个免费模型)
|
||||
|
||||
```bash
|
||||
仪表板 → 连接 Qwen
|
||||
→ 设备码授权
|
||||
→ 无限使用
|
||||
|
||||
模型:
|
||||
qw/qwen3-coder-plus
|
||||
qw/qwen3-coder-flash
|
||||
```
|
||||
|
||||
### Kiro(免费 Claude)
|
||||
|
||||
```bash
|
||||
仪表板 → 连接 Kiro
|
||||
→ AWS Builder ID 或 Google/GitHub
|
||||
→ 无限使用
|
||||
|
||||
模型:
|
||||
kr/claude-sonnet-4.5
|
||||
kr/claude-haiku-4.5
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🎨 创建组合</b></summary>
|
||||
|
||||
### 示例 1:最大化订阅 → 便宜备用
|
||||
|
||||
```
|
||||
仪表板 → 组合 → 创建新的
|
||||
|
||||
名称:premium-coding
|
||||
模型:
|
||||
1. cc/claude-opus-4-6(订阅主力)
|
||||
2. glm/glm-4.7(便宜备用,$0.6/1M)
|
||||
3. minimax/MiniMax-M2.1(最便宜的后备,$0.20/1M)
|
||||
|
||||
在 CLI 中使用:premium-coding
|
||||
```
|
||||
|
||||
### 示例 2:仅免费(零成本)
|
||||
|
||||
```
|
||||
名称:free-combo
|
||||
模型:
|
||||
1. gc/gemini-3-flash-preview(每月 180K 免费)
|
||||
2. if/kimi-k2-thinking(无限制)
|
||||
3. qw/qwen3-coder-plus(无限制)
|
||||
|
||||
成本:永远 $0!
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔧 CLI 集成</b></summary>
|
||||
|
||||
### Cursor IDE
|
||||
|
||||
```
|
||||
设置 → 模型 → 高级:
|
||||
OpenAI API Base URL: http://localhost:20128/v1
|
||||
OpenAI API Key: [从 OmniRoute 仪表板获取]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
|
||||
使用仪表板中的 **CLI Tools** 页面一键配置,或手动编辑 `~/.claude/settings.json`。
|
||||
|
||||
### Codex CLI
|
||||
|
||||
```bash
|
||||
export OPENAI_BASE_URL="http://localhost:20128"
|
||||
export OPENAI_API_KEY="your-omniroute-api-key"
|
||||
|
||||
codex "your prompt"
|
||||
```
|
||||
|
||||
### OpenClaw
|
||||
|
||||
**选项 1 — 仪表板(推荐):**
|
||||
|
||||
```
|
||||
仪表板 → CLI Tools → OpenClaw → 选择模型 → 应用
|
||||
```
|
||||
|
||||
**选项 2 — 手动:** 编辑 `~/.openclaw/openclaw.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"models": {
|
||||
"providers": {
|
||||
"omniroute": {
|
||||
"baseUrl": "http://127.0.0.1:20128/v1",
|
||||
"apiKey": "sk_omniroute",
|
||||
"api": "openai-completions"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> **注意:** OpenClaw 仅支持本地 OmniRoute。使用 `127.0.0.1` 而非 `localhost` 以避免 IPv6 解析问题。
|
||||
|
||||
### Cline / Continue / RooCode
|
||||
|
||||
```
|
||||
设置 → API 配置:
|
||||
提供商:OpenAI Compatible
|
||||
Base URL: http://localhost:20128/v1
|
||||
API Key: [从 OmniRoute 仪表板获取]
|
||||
Model: if/kimi-k2-thinking
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 📊 可用模型
|
||||
|
||||
<details>
|
||||
<summary><b>查看所有可用模型</b></summary>
|
||||
|
||||
**Claude Code (`cc/`)** - Pro/Max:
|
||||
|
||||
- `cc/claude-opus-4-6`
|
||||
- `cc/claude-sonnet-4-5-20250929`
|
||||
- `cc/claude-haiku-4-5-20251001`
|
||||
|
||||
**Codex (`cx/`)** - Plus/Pro:
|
||||
|
||||
- `cx/gpt-5.2-codex`
|
||||
- `cx/gpt-5.1-codex-max`
|
||||
|
||||
**Gemini CLI (`gc/`)** - 免费:
|
||||
|
||||
- `gc/gemini-3-flash-preview`
|
||||
- `gc/gemini-2.5-pro`
|
||||
|
||||
**GitHub Copilot (`gh/`)**:
|
||||
|
||||
- `gh/gpt-5`
|
||||
- `gh/claude-4.5-sonnet`
|
||||
|
||||
**NVIDIA NIM (`nvidia/`)** - 免费积分:
|
||||
|
||||
- `nvidia/llama-3.3-70b-instruct`
|
||||
- `nvidia/mistral-7b-instruct`
|
||||
- 50+ 更多模型在 [build.nvidia.com](https://build.nvidia.com)
|
||||
|
||||
**GLM (`glm/`)** - $0.6/1M:
|
||||
|
||||
- `glm/glm-4.7`
|
||||
|
||||
**MiniMax (`minimax/`)** - $0.2/1M:
|
||||
|
||||
- `minimax/MiniMax-M2.1`
|
||||
|
||||
**iFlow (`if/`)** - 免费:
|
||||
|
||||
- `if/kimi-k2-thinking`
|
||||
- `if/qwen3-coder-plus`
|
||||
- `if/deepseek-r1`
|
||||
- `if/glm-4.7`
|
||||
- `if/minimax-m2`
|
||||
|
||||
**Qwen (`qw/`)** - 免费:
|
||||
|
||||
- `qw/qwen3-coder-plus`
|
||||
- `qw/qwen3-coder-flash`
|
||||
|
||||
**Kiro (`kr/`)** - 免费:
|
||||
|
||||
- `kr/claude-sonnet-4.5`
|
||||
- `kr/claude-haiku-4.5`
|
||||
|
||||
**OpenRouter (`or/`)** - 100+ 模型:
|
||||
|
||||
- `or/anthropic/claude-4-sonnet`
|
||||
- `or/google/gemini-2.5-pro`
|
||||
- [openrouter.ai/models](https://openrouter.ai/models) 上的任何模型
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🧪 评估 (Evals)
|
||||
|
||||
OmniRoute 包含内置评估框架,用于针对黄金集测试 LLM 响应质量。通过仪表板中的 **Analytics → Evals** 访问。
|
||||
|
||||
### 内置黄金集
|
||||
|
||||
预加载的「OmniRoute Golden Set」包含 10 个测试用例:
|
||||
|
||||
- 问候、数学、地理、代码生成
|
||||
- JSON 格式合规性、翻译、markdown
|
||||
- 安全拒绝(有害内容)、计数、布尔逻辑
|
||||
|
||||
### 评估策略
|
||||
|
||||
| 策略 | 描述 | 示例 |
|
||||
| ---------- | -------------------------------- | -------------------------------- |
|
||||
| `exact` | 输出必须完全匹配 | `"4"` |
|
||||
| `contains` | 输出必须包含子串(不区分大小写) | `"Paris"` |
|
||||
| `regex` | 输出必须匹配正则表达式模式 | `"1.*2.*3"` |
|
||||
| `custom` | 自定义 JS 函数返回 true/false | `(output) => output.length > 10` |
|
||||
|
||||
---
|
||||
|
||||
## 🐛 故障排除
|
||||
|
||||
<details>
|
||||
<summary><b>点击展开故障排除指南</b></summary>
|
||||
|
||||
**"Language model did not provide messages"**
|
||||
|
||||
- 提供商配额已耗尽 → 检查仪表板配额追踪器
|
||||
- 解决方案:使用组合故障转移或切换到更便宜的层级
|
||||
|
||||
**速率限制**
|
||||
|
||||
- 订阅配额耗尽 → 回退到 GLM/MiniMax
|
||||
- 添加组合:`cc/claude-opus-4-6 → glm/glm-4.7 → if/kimi-k2-thinking`
|
||||
|
||||
**OAuth 令牌过期**
|
||||
|
||||
- OmniRoute 自动刷新
|
||||
- 如果问题持续:仪表板 → 提供商 → 重新连接
|
||||
|
||||
**高成本**
|
||||
|
||||
- 在仪表板 → 成本中检查使用统计
|
||||
- 将主要模型切换为 GLM/MiniMax
|
||||
- 对非关键任务使用免费层(Gemini CLI、iFlow)
|
||||
|
||||
**仪表板在错误端口打开**
|
||||
|
||||
- 设置 `PORT=20128` 和 `NEXT_PUBLIC_BASE_URL=http://localhost:20128`
|
||||
|
||||
**Cloud sync 错误**
|
||||
|
||||
- 验证 `BASE_URL` 指向您正在运行的实例
|
||||
- 验证 `CLOUD_URL` 指向预期的云端点
|
||||
- 保持 `NEXT_PUBLIC_*` 值与服务器端值一致
|
||||
|
||||
**首次登录不工作**
|
||||
|
||||
- 检查 `.env` 中的 `INITIAL_PASSWORD`
|
||||
- 如未设置,默认密码为 `123456`
|
||||
|
||||
**没有请求日志**
|
||||
|
||||
- 在 `.env` 中设置 `ENABLE_REQUEST_LOGS=true`
|
||||
|
||||
**兼容 OpenAI 的提供商连接测试显示 "Invalid"**
|
||||
|
||||
- 许多提供商不暴露 `/models` 端点
|
||||
- OmniRoute v1.0.6+ 包含通过 chat completions 的回退验证
|
||||
- 确保 base URL 包含 `/v1` 后缀
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ 技术栈
|
||||
|
||||
- **运行时**: Node.js 20+
|
||||
- **语言**: TypeScript 5.9 — `src/` 和 `open-sse/` 中 **100% TypeScript**(v1.0.6)
|
||||
- **框架**: Next.js 16 + React 19 + Tailwind CSS 4
|
||||
- **数据库**: LowDB (JSON) + SQLite(领域状态 + 代理日志)
|
||||
- **流式传输**: Server-Sent Events (SSE)
|
||||
- **认证**: OAuth 2.0 (PKCE) + JWT + API Keys
|
||||
- **测试**: Node.js test runner(368+ 单元测试)
|
||||
- **CI/CD**: GitHub Actions(发布时自动 npm 发布 + Docker Hub)
|
||||
- **网站**: [omniroute.online](https://omniroute.online)
|
||||
- **包**: [npmjs.com/package/omniroute](https://www.npmjs.com/package/omniroute)
|
||||
- **Docker**: [hub.docker.com/r/diegosouzapw/omniroute](https://hub.docker.com/r/diegosouzapw/omniroute)
|
||||
- **弹性**: 断路器、指数退避、反惊群、TLS 伪装
|
||||
|
||||
---
|
||||
|
||||
## 📖 文档
|
||||
|
||||
| 文档 | 描述 |
|
||||
| ----------------------------------- | ---------------------------- |
|
||||
| [用户指南](docs/USER_GUIDE.md) | 提供商、组合、CLI 集成、部署 |
|
||||
| [API 参考](docs/API_REFERENCE.md) | 所有端点及示例 |
|
||||
| [故障排除](docs/TROUBLESHOOTING.md) | 常见问题和解决方案 |
|
||||
| [架构](docs/ARCHITECTURE.md) | 系统架构和内部机制 |
|
||||
| [贡献指南](CONTRIBUTING.md) | 开发设置和指南 |
|
||||
| [OpenAPI 规范](docs/openapi.yaml) | OpenAPI 3.0 规范 |
|
||||
| [安全策略](SECURITY.md) | 漏洞报告和安全实践 |
|
||||
|
||||
---
|
||||
|
||||
## 📧 支持
|
||||
|
||||
> 💬 **加入我们的社区!** [WhatsApp 群组](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t) — 获取帮助、分享技巧、了解最新动态。
|
||||
|
||||
- **网站**: [omniroute.online](https://omniroute.online)
|
||||
- **GitHub**: [github.com/diegosouzapw/OmniRoute](https://github.com/diegosouzapw/OmniRoute)
|
||||
- **Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **WhatsApp**: [社区群组](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t)
|
||||
- **原始项目**: [decolua 的 9router](https://github.com/decolua/9router)
|
||||
|
||||
---
|
||||
|
||||
## 👥 贡献者
|
||||
|
||||
[](https://github.com/diegosouzapw/OmniRoute/graphs/contributors)
|
||||
|
||||
### 如何贡献
|
||||
|
||||
1. Fork 仓库
|
||||
2. 创建功能分支(`git checkout -b feature/amazing-feature`)
|
||||
3. 提交更改(`git commit -m 'Add amazing feature'`)
|
||||
4. 推送到分支(`git push origin feature/amazing-feature`)
|
||||
5. 打开 Pull Request
|
||||
|
||||
详细指南请参阅 [CONTRIBUTING.md](CONTRIBUTING.md)。
|
||||
|
||||
### 发布新版本
|
||||
|
||||
```bash
|
||||
# 创建发布 — npm 发布自动完成
|
||||
gh release create v1.0.6 --title "v1.0.6" --generate-notes
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 Star 历史
|
||||
|
||||
<a href="https://star-history.com/#diegosouzapw/OmniRoute&Date">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=diegosouzapw/OmniRoute&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
---
|
||||
|
||||
## 🙏 致谢
|
||||
|
||||
特别感谢 **[decolua](https://github.com/decolua)** 的 **[9router](https://github.com/decolua/9router)** — 启发了本 fork 的原始项目。OmniRoute 在这个令人难以置信的基础上添加了额外功能、多模态 API 和完整的 TypeScript 重写。
|
||||
|
||||
特别感谢 **[CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI)** — 启发了本 JavaScript 移植的原始 Go 实现。
|
||||
|
||||
---
|
||||
|
||||
## 📄 许可证
|
||||
|
||||
MIT 许可证 — 详见 [LICENSE](LICENSE)。
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<sub>用 ❤️ 为 24/7 编程的开发者打造</sub>
|
||||
<br/>
|
||||
<sub><a href="https://omniroute.online">omniroute.online</a></sub>
|
||||
</div>
|
||||
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const ROOT = join(__dirname, "..");
|
||||
|
||||
function resolveMcpEntry(rootDir = ROOT) {
|
||||
const candidates = [
|
||||
// Preferred distributable JS entry (npm publish artifact)
|
||||
join(rootDir, "app", "open-sse", "mcp-server", "server.js"),
|
||||
// Local workspace TypeScript source fallback
|
||||
join(rootDir, "open-sse", "mcp-server", "server.ts"),
|
||||
];
|
||||
|
||||
for (const entry of candidates) {
|
||||
if (existsSync(entry)) return entry;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function formatSpawnError(exitCode, signal) {
|
||||
if (signal) return `MCP server exited by signal ${signal}`;
|
||||
return `MCP server exited with code ${exitCode ?? 1}`;
|
||||
}
|
||||
|
||||
export async function startMcpCli(rootDir = ROOT) {
|
||||
const mcpEntry = resolveMcpEntry(rootDir);
|
||||
if (!mcpEntry) {
|
||||
throw new Error(
|
||||
"MCP server entrypoint not found. Expected app/open-sse/mcp-server/server.js or open-sse/mcp-server/server.ts."
|
||||
);
|
||||
}
|
||||
|
||||
// `tsx` loader is only required for local `.ts` fallback; JS entry works without it.
|
||||
const loaderArgs = mcpEntry.endsWith(".ts") ? ["--import", "tsx/esm"] : [];
|
||||
|
||||
await new Promise((resolve, reject) => {
|
||||
const child = spawn(process.execPath, [...loaderArgs, mcpEntry], {
|
||||
cwd: rootDir,
|
||||
env: process.env,
|
||||
stdio: "inherit",
|
||||
});
|
||||
|
||||
child.once("error", reject);
|
||||
child.once("exit", (code, signal) => {
|
||||
if ((code ?? 0) === 0 && !signal) {
|
||||
resolve(undefined);
|
||||
return;
|
||||
}
|
||||
reject(new Error(formatSpawnError(code, signal)));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) {
|
||||
startMcpCli().catch((err) => {
|
||||
console.error("\x1b[31m✖ Failed to start MCP server:\x1b[0m", err?.message || err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
+143
-21
@@ -7,20 +7,76 @@
|
||||
* omniroute Start the server (default port 20128)
|
||||
* omniroute --port 3000 Start on custom port
|
||||
* omniroute --no-open Start without opening browser
|
||||
* omniroute --mcp Start MCP server (stdio transport for IDEs)
|
||||
* omniroute --help Show help
|
||||
* omniroute --version Show version
|
||||
*/
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { homedir, platform } from "node:os";
|
||||
import { isNativeBinaryCompatible } from "../scripts/native-binary-compat.mjs";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const ROOT = join(__dirname, "..");
|
||||
const APP_DIR = join(ROOT, "app");
|
||||
|
||||
// ── Load .env file (for global npm install) ─────────────────
|
||||
function loadEnvFile() {
|
||||
const envPaths = [];
|
||||
|
||||
// 1. DATA_DIR/.env if set
|
||||
if (process.env.DATA_DIR) {
|
||||
envPaths.push(join(process.env.DATA_DIR, ".env"));
|
||||
}
|
||||
|
||||
// 2. ~/.omniroute/.env (default data dir)
|
||||
const home = homedir();
|
||||
if (home) {
|
||||
if (platform() === "win32") {
|
||||
const appData = process.env.APPDATA || join(home, "AppData", "Roaming");
|
||||
envPaths.push(join(appData, "omniroute", ".env"));
|
||||
} else {
|
||||
envPaths.push(join(home, ".omniroute", ".env"));
|
||||
}
|
||||
}
|
||||
|
||||
// 3. ./.env (current working directory)
|
||||
envPaths.push(join(process.cwd(), ".env"));
|
||||
|
||||
for (const envPath of envPaths) {
|
||||
try {
|
||||
if (existsSync(envPath)) {
|
||||
const content = readFileSync(envPath, "utf-8");
|
||||
for (const line of content.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
// Skip empty lines and comments
|
||||
if (!trimmed || trimmed.startsWith("#")) continue;
|
||||
const eqIdx = trimmed.indexOf("=");
|
||||
if (eqIdx > 0) {
|
||||
const key = trimmed.slice(0, eqIdx).trim();
|
||||
const value = trimmed.slice(eqIdx + 1).trim();
|
||||
// Don't override existing env vars
|
||||
if (process.env[key] === undefined) {
|
||||
// Remove surrounding quotes
|
||||
process.env[key] = value.replace(/^["']|["']$/g, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(` \x1b[2m📋 Loaded env from ${envPath}\x1b[0m`);
|
||||
return;
|
||||
}
|
||||
} catch {
|
||||
// Ignore errors reading env files
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
loadEnvFile();
|
||||
|
||||
// ── Parse args ─────────────────────────────────────────────
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
@@ -30,18 +86,30 @@ if (args.includes("--help") || args.includes("-h")) {
|
||||
|
||||
\x1b[1mUsage:\x1b[0m
|
||||
omniroute Start the server
|
||||
omniroute --port <port> Use custom port (default: 20128)
|
||||
omniroute --port <port> Use custom API port (default: 20128)
|
||||
omniroute --no-open Don't open browser automatically
|
||||
omniroute --mcp Start MCP server (stdio transport for IDEs)
|
||||
omniroute --help Show this help
|
||||
omniroute --version Show version
|
||||
|
||||
\x1b[1mMCP Integration:\x1b[0m
|
||||
The --mcp flag starts an MCP server over stdio, exposing OmniRoute
|
||||
tools for AI agents in VS Code, Cursor, Claude Desktop, and Copilot.
|
||||
|
||||
Available tools: omniroute_get_health, omniroute_list_combos,
|
||||
omniroute_check_quota, omniroute_route_request, and more.
|
||||
|
||||
\x1b[1mConfig:\x1b[0m
|
||||
Loads .env from: ~/.omniroute/.env or ./.env
|
||||
Memory limit: OMNIROUTE_MEMORY_MB (default: 512)
|
||||
|
||||
\x1b[1mAfter starting:\x1b[0m
|
||||
Dashboard: http://localhost:<port>
|
||||
API: http://localhost:<port>/v1
|
||||
Dashboard: http://localhost:<dashboard-port>
|
||||
API: http://localhost:<api-port>/v1
|
||||
|
||||
\x1b[1mConnect your tools:\x1b[0m
|
||||
Set your CLI tool (Cursor, Cline, Codex, etc.) to use:
|
||||
\x1b[33mhttp://localhost:20128/v1\x1b[0m
|
||||
\x1b[33mhttp://localhost:<api-port>/v1\x1b[0m
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
@@ -58,27 +126,48 @@ if (args.includes("--version") || args.includes("-v")) {
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Parse --port
|
||||
let port = 20128;
|
||||
// ── MCP Server Mode ───────────────────────────────────────
|
||||
if (args.includes("--mcp")) {
|
||||
try {
|
||||
const { startMcpCli } = await import(join(ROOT, "bin", "mcp-server.mjs"));
|
||||
await startMcpCli(ROOT);
|
||||
} catch (err) {
|
||||
console.error("\x1b[31m✖ Failed to start MCP server:\x1b[0m", err.message || err);
|
||||
process.exit(1);
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
function parsePort(value, fallback) {
|
||||
const parsed = parseInt(String(value), 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 && parsed <= 65535 ? parsed : fallback;
|
||||
}
|
||||
|
||||
// Parse --port (canonical/base port)
|
||||
let port = parsePort(process.env.PORT || "20128", 20128);
|
||||
const portIdx = args.indexOf("--port");
|
||||
if (portIdx !== -1 && args[portIdx + 1]) {
|
||||
port = parseInt(args[portIdx + 1], 10);
|
||||
if (isNaN(port)) {
|
||||
const cliPort = parsePort(args[portIdx + 1], null);
|
||||
if (cliPort === null) {
|
||||
console.error("\x1b[31m✖ Invalid port number\x1b[0m");
|
||||
process.exit(1);
|
||||
}
|
||||
port = cliPort;
|
||||
}
|
||||
|
||||
const apiPort = parsePort(process.env.API_PORT || String(port), port);
|
||||
const dashboardPort = parsePort(process.env.DASHBOARD_PORT || String(port), port);
|
||||
|
||||
const noOpen = args.includes("--no-open");
|
||||
|
||||
// ── Banner ─────────────────────────────────────────────────
|
||||
console.log(`
|
||||
\x1b[36m ____ _ ____ _
|
||||
/ __ \\ (_) __ \\ | |
|
||||
| | | |_ __ ___ _ __ _| |__) |___ _ _| |_ ___
|
||||
| | | | '_ \` _ \\| '_ \\ | _ // _ \\| | | | __/ _ \\
|
||||
| |__| | | | | | | | | | | | \\ \\ (_) | |_| | || __/
|
||||
\\____/|_| |_| |_|_| |_|_|_| \\_\\___/ \\__,_|\\__\\___|
|
||||
/ __ \\ (_) __ \\ | |
|
||||
| | | |_ __ ___ _ __ _| |__) |___ _ _| |_ ___
|
||||
| | | | '_ \` _ \\| '_ \\ | _ // _ \\| | | | __/ _ \\
|
||||
| |__| | | | | | | | | | | | \\ \\ (_) | |_| | || __/
|
||||
\\____/|_| |_| |_|_| |_|_|_| \\_\\___/ \\__,_|\\__\\___|
|
||||
\x1b[0m`);
|
||||
|
||||
// ── Node.js version check ──────────────────────────────────
|
||||
@@ -105,17 +194,49 @@ if (!existsSync(serverJs)) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ── Pre-flight: verify better-sqlite3 native binary ───────
|
||||
// Verify the binary's actual target platform/arch before trusting dlopen.
|
||||
// This avoids the macOS false positive where a bundled linux-x64 addon can
|
||||
// appear to load even though the runtime will fail when better-sqlite3 starts.
|
||||
const sqliteBinary = join(
|
||||
APP_DIR,
|
||||
"node_modules",
|
||||
"better-sqlite3",
|
||||
"build",
|
||||
"Release",
|
||||
"better_sqlite3.node"
|
||||
);
|
||||
if (existsSync(sqliteBinary) && !isNativeBinaryCompatible(sqliteBinary)) {
|
||||
console.error(
|
||||
"\x1b[31m✖ better-sqlite3 native module is incompatible with this platform.\x1b[0m"
|
||||
);
|
||||
console.error(` Run: cd ${APP_DIR} && npm rebuild better-sqlite3`);
|
||||
if (platform() === "darwin") {
|
||||
console.error(" If build tools are missing: xcode-select --install");
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ── Start server ───────────────────────────────────────────
|
||||
console.log(` \x1b[2m⏳ Starting server...\x1b[0m\n`);
|
||||
|
||||
// Sanitize memory limit — parseInt to prevent command injection (#150)
|
||||
const rawMemory = parseInt(process.env.OMNIROUTE_MEMORY_MB || "512", 10);
|
||||
const memoryLimit =
|
||||
Number.isFinite(rawMemory) && rawMemory >= 64 && rawMemory <= 16384 ? rawMemory : 512;
|
||||
|
||||
const env = {
|
||||
...process.env,
|
||||
PORT: String(port),
|
||||
OMNIROUTE_PORT: String(port),
|
||||
PORT: String(dashboardPort),
|
||||
DASHBOARD_PORT: String(dashboardPort),
|
||||
API_PORT: String(apiPort),
|
||||
HOSTNAME: "0.0.0.0",
|
||||
NODE_ENV: "production",
|
||||
NODE_OPTIONS: `--max-old-space-size=${memoryLimit}`,
|
||||
};
|
||||
|
||||
const server = spawn("node", [serverJs], {
|
||||
const server = spawn("node", [`--max-old-space-size=${memoryLimit}`, serverJs], {
|
||||
cwd: APP_DIR,
|
||||
env,
|
||||
stdio: "pipe",
|
||||
@@ -168,16 +289,17 @@ process.on("SIGTERM", shutdown);
|
||||
|
||||
// ── On ready ───────────────────────────────────────────────
|
||||
async function onReady() {
|
||||
const url = `http://localhost:${port}`;
|
||||
const dashboardUrl = `http://localhost:${dashboardPort}`;
|
||||
const apiUrl = `http://localhost:${apiPort}`;
|
||||
|
||||
console.log(`
|
||||
\x1b[32m✔ OmniRoute is running!\x1b[0m
|
||||
|
||||
\x1b[1m Dashboard:\x1b[0m ${url}
|
||||
\x1b[1m API Base:\x1b[0m ${url}/v1
|
||||
\x1b[1m Dashboard:\x1b[0m ${dashboardUrl}
|
||||
\x1b[1m API Base:\x1b[0m ${apiUrl}/v1
|
||||
|
||||
\x1b[2m Point your CLI tool (Cursor, Cline, Codex) to:\x1b[0m
|
||||
\x1b[33m ${url}/v1\x1b[0m
|
||||
\x1b[33m ${apiUrl}/v1\x1b[0m
|
||||
|
||||
\x1b[2m Press Ctrl+C to stop\x1b[0m
|
||||
`);
|
||||
@@ -185,7 +307,7 @@ async function onReady() {
|
||||
if (!noOpen) {
|
||||
try {
|
||||
const open = await import("open");
|
||||
await open.default(url);
|
||||
await open.default(dashboardUrl);
|
||||
} catch {
|
||||
// open is optional — if not available, just skip
|
||||
}
|
||||
|
||||
Submodule clipr/9router deleted from bc91be7305
Submodule clipr/CLIProxyAPI deleted from 068630dbd0
@@ -22,21 +22,19 @@ services:
|
||||
env_file: .env
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- PORT=20128
|
||||
- PORT=${PORT:-20128}
|
||||
- DASHBOARD_PORT=${DASHBOARD_PORT:-${PORT:-20128}}
|
||||
- API_PORT=${API_PORT:-20129}
|
||||
- API_HOST=${API_HOST:-0.0.0.0}
|
||||
- HOSTNAME=0.0.0.0
|
||||
- DATA_DIR=/app/data
|
||||
ports:
|
||||
- "20130:20128"
|
||||
- "${PROD_DASHBOARD_PORT:-20130}:${DASHBOARD_PORT:-${PORT:-20128}}"
|
||||
- "${PROD_API_PORT:-20131}:${API_PORT:-20129}"
|
||||
volumes:
|
||||
- omniroute-prod-data:/app/data
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD",
|
||||
"node",
|
||||
"-e",
|
||||
"fetch('http://127.0.0.1:20128/api/settings').then(r=>{if(!r.ok)throw r.status}).catch(()=>process.exit(1))",
|
||||
]
|
||||
test: ["CMD", "node", "healthcheck.mjs"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
+16
-10
@@ -20,16 +20,14 @@ x-common: &common
|
||||
env_file: .env
|
||||
environment:
|
||||
- DATA_DIR=/app/data # Must match the volume mount below
|
||||
- PORT=${PORT:-20128}
|
||||
- DASHBOARD_PORT=${DASHBOARD_PORT:-${PORT:-20128}}
|
||||
- API_PORT=${API_PORT:-20129}
|
||||
- API_HOST=${API_HOST:-0.0.0.0}
|
||||
volumes:
|
||||
- omniroute-data:/app/data
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD",
|
||||
"node",
|
||||
"-e",
|
||||
"fetch('http://127.0.0.1:20128/api/settings').then(r=>{if(!r.ok)throw r.status}).catch(()=>process.exit(1))",
|
||||
]
|
||||
test: ["CMD", "node", "healthcheck.mjs"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
@@ -45,7 +43,8 @@ services:
|
||||
target: runner-base
|
||||
image: omniroute:base
|
||||
ports:
|
||||
- "${PORT:-20128}:20128"
|
||||
- "${DASHBOARD_PORT:-${PORT:-20128}}:${DASHBOARD_PORT:-${PORT:-20128}}"
|
||||
- "${API_PORT:-20129}:${API_PORT:-20129}"
|
||||
profiles:
|
||||
- base
|
||||
|
||||
@@ -58,7 +57,8 @@ services:
|
||||
target: runner-cli
|
||||
image: omniroute:cli
|
||||
ports:
|
||||
- "${PORT:-20128}:20128"
|
||||
- "${DASHBOARD_PORT:-${PORT:-20128}}:${DASHBOARD_PORT:-${PORT:-20128}}"
|
||||
- "${API_PORT:-20129}:${API_PORT:-20129}"
|
||||
profiles:
|
||||
- cli
|
||||
|
||||
@@ -71,8 +71,14 @@ services:
|
||||
target: runner-base
|
||||
image: omniroute:base
|
||||
ports:
|
||||
- "${PORT:-20128}:20128"
|
||||
- "${DASHBOARD_PORT:-${PORT:-20128}}:${DASHBOARD_PORT:-${PORT:-20128}}"
|
||||
- "${API_PORT:-20129}:${API_PORT:-20129}"
|
||||
environment:
|
||||
- DATA_DIR=/app/data
|
||||
- PORT=${PORT:-20128}
|
||||
- DASHBOARD_PORT=${DASHBOARD_PORT:-${PORT:-20128}}
|
||||
- API_PORT=${API_PORT:-20129}
|
||||
- API_HOST=${API_HOST:-0.0.0.0}
|
||||
- CLI_MODE=host
|
||||
- CLI_EXTRA_PATHS=/host-local/bin:/host-node/bin
|
||||
- CLI_CONFIG_HOME=/host-home
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
# OmniRoute A2A Server Documentation
|
||||
|
||||
> Agent-to-Agent Protocol v0.3 — OmniRoute as an intelligent routing agent
|
||||
|
||||
## Agent Discovery
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/.well-known/agent.json
|
||||
```
|
||||
|
||||
Returns the Agent Card describing OmniRoute's capabilities, skills, and authentication requirements.
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
All `/a2a` requests require an API key via the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer YOUR_OMNIROUTE_API_KEY
|
||||
```
|
||||
|
||||
If no API key is configured on the server, authentication is bypassed.
|
||||
|
||||
---
|
||||
|
||||
## JSON-RPC 2.0 Methods
|
||||
|
||||
### `message/send` — Synchronous Execution
|
||||
|
||||
Sends a message to a skill and waits for the complete response.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Write a hello world in Python"}],
|
||||
"metadata": {"model": "auto", "combo": "fast-coding"}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"result": {
|
||||
"task": { "id": "uuid", "state": "completed" },
|
||||
"artifacts": [{ "type": "text", "content": "..." }],
|
||||
"metadata": {
|
||||
"routing_explanation": "Selected claude-sonnet via provider \"anthropic\" (latency: 1200ms, cost: $0.003)",
|
||||
"cost_envelope": { "estimated": 0.005, "actual": 0.003, "currency": "USD" },
|
||||
"resilience_trace": [
|
||||
{ "event": "primary_selected", "provider": "anthropic", "timestamp": "..." }
|
||||
],
|
||||
"policy_verdict": { "allowed": true, "reason": "within budget and quota limits" }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `message/stream` — SSE Streaming
|
||||
|
||||
Same as `message/send` but returns Server-Sent Events for real-time streaming.
|
||||
|
||||
```bash
|
||||
curl -N -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/stream",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Explain quantum computing"}]
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**SSE Events:**
|
||||
|
||||
```
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"working"},"chunk":{"type":"text","content":"..."}}}
|
||||
|
||||
: heartbeat 2026-03-03T17:00:00Z
|
||||
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"completed"},"metadata":{...}}}
|
||||
```
|
||||
|
||||
### `tasks/get` — Query Task Status
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"2","method":"tasks/get","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
### `tasks/cancel` — Cancel a Task
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"3","method":"tasks/cancel","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Skills
|
||||
|
||||
| Skill | Description |
|
||||
| :----------------- | :------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `smart-routing` | Routes prompts through OmniRoute's intelligent pipeline. Returns response with routing explanation, cost, and resilience trace. |
|
||||
| `quota-management` | Answers natural-language queries about provider quotas, suggests free combos, and provides quota rankings. |
|
||||
|
||||
---
|
||||
|
||||
## Task Lifecycle
|
||||
|
||||
```
|
||||
submitted → working → completed
|
||||
→ failed
|
||||
→ cancelled
|
||||
```
|
||||
|
||||
- Tasks expire after 5 minutes (configurable)
|
||||
- Terminal states: `completed`, `failed`, `cancelled`
|
||||
- Event log tracks every state transition
|
||||
|
||||
---
|
||||
|
||||
## Error Codes
|
||||
|
||||
| Code | Meaning |
|
||||
| :----- | :----------------------------- |
|
||||
| -32700 | Parse error (invalid JSON) |
|
||||
| -32600 | Invalid request / Unauthorized |
|
||||
| -32601 | Method or skill not found |
|
||||
| -32602 | Invalid params |
|
||||
| -32603 | Internal error |
|
||||
|
||||
---
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### Python (requests)
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
resp = requests.post("http://localhost:20128/a2a", json={
|
||||
"jsonrpc": "2.0", "id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}
|
||||
}, headers={"Authorization": "Bearer YOUR_KEY"})
|
||||
|
||||
result = resp.json()["result"]
|
||||
print(result["artifacts"][0]["content"])
|
||||
print(result["metadata"]["routing_explanation"])
|
||||
```
|
||||
|
||||
### TypeScript (fetch)
|
||||
|
||||
```typescript
|
||||
const resp = await fetch("http://localhost:20128/a2a", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: "Bearer YOUR_KEY",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: "1",
|
||||
method: "message/send",
|
||||
params: {
|
||||
skill: "smart-routing",
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
const { result } = await resp.json();
|
||||
console.log(result.metadata.routing_explanation);
|
||||
```
|
||||
@@ -1,5 +1,7 @@
|
||||
# API Reference
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](API_REFERENCE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/API_REFERENCE.md) | 🇪🇸 [Español](i18n/es/API_REFERENCE.md) | 🇫🇷 [Français](i18n/fr/API_REFERENCE.md) | 🇮🇹 [Italiano](i18n/it/API_REFERENCE.md) | 🇷🇺 [Русский](i18n/ru/API_REFERENCE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/API_REFERENCE.md) | 🇩🇪 [Deutsch](i18n/de/API_REFERENCE.md) | 🇮🇳 [हिन्दी](i18n/in/API_REFERENCE.md) | 🇹🇭 [ไทย](i18n/th/API_REFERENCE.md) | 🇺🇦 [Українська](i18n/uk-UA/API_REFERENCE.md) | 🇸🇦 [العربية](i18n/ar/API_REFERENCE.md) | 🇯🇵 [日本語](i18n/ja/API_REFERENCE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/API_REFERENCE.md) | 🇧🇬 [Български](i18n/bg/API_REFERENCE.md) | 🇩🇰 [Dansk](i18n/da/API_REFERENCE.md) | 🇫🇮 [Suomi](i18n/fi/API_REFERENCE.md) | 🇮🇱 [עברית](i18n/he/API_REFERENCE.md) | 🇭🇺 [Magyar](i18n/hu/API_REFERENCE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/API_REFERENCE.md) | 🇰🇷 [한국어](i18n/ko/API_REFERENCE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/API_REFERENCE.md) | 🇳🇱 [Nederlands](i18n/nl/API_REFERENCE.md) | 🇳🇴 [Norsk](i18n/no/API_REFERENCE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/API_REFERENCE.md) | 🇷🇴 [Română](i18n/ro/API_REFERENCE.md) | 🇵🇱 [Polski](i18n/pl/API_REFERENCE.md) | 🇸🇰 [Slovenčina](i18n/sk/API_REFERENCE.md) | 🇸🇪 [Svenska](i18n/sv/API_REFERENCE.md) | 🇵🇭 [Filipino](i18n/phi/API_REFERENCE.md)
|
||||
|
||||
Complete reference for all OmniRoute API endpoints.
|
||||
|
||||
---
|
||||
@@ -258,6 +260,16 @@ Response example:
|
||||
|
||||
CLI responses include: `installed`, `runnable`, `command`, `commandPath`, `runtimeMode`, `reason`.
|
||||
|
||||
### ACP Agents
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------- | ------ | -------------------------------------------------------- |
|
||||
| `/api/acp/agents` | GET | List all detected agents (built-in + custom) with status |
|
||||
| `/api/acp/agents` | POST | Add custom agent or refresh detection cache |
|
||||
| `/api/acp/agents` | DELETE | Remove a custom agent by `id` query param |
|
||||
|
||||
GET response includes `agents[]` (id, name, binary, version, installed, protocol, isCustom) and `summary` (total, installed, notFound, builtIn, custom).
|
||||
|
||||
### Resilience & Rate Limits
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|
||||
+33
-29
@@ -1,6 +1,8 @@
|
||||
# OmniRoute Architecture
|
||||
|
||||
_Last updated: 2026-02-18_
|
||||
🌐 **Languages:** 🇺🇸 [English](ARCHITECTURE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/ARCHITECTURE.md) | 🇪🇸 [Español](i18n/es/ARCHITECTURE.md) | 🇫🇷 [Français](i18n/fr/ARCHITECTURE.md) | 🇮🇹 [Italiano](i18n/it/ARCHITECTURE.md) | 🇷🇺 [Русский](i18n/ru/ARCHITECTURE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/ARCHITECTURE.md) | 🇩🇪 [Deutsch](i18n/de/ARCHITECTURE.md) | 🇮🇳 [हिन्दी](i18n/in/ARCHITECTURE.md) | 🇹🇭 [ไทย](i18n/th/ARCHITECTURE.md) | 🇺🇦 [Українська](i18n/uk-UA/ARCHITECTURE.md) | 🇸🇦 [العربية](i18n/ar/ARCHITECTURE.md) | 🇯🇵 [日本語](i18n/ja/ARCHITECTURE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/ARCHITECTURE.md) | 🇧🇬 [Български](i18n/bg/ARCHITECTURE.md) | 🇩🇰 [Dansk](i18n/da/ARCHITECTURE.md) | 🇫🇮 [Suomi](i18n/fi/ARCHITECTURE.md) | 🇮🇱 [עברית](i18n/he/ARCHITECTURE.md) | 🇭🇺 [Magyar](i18n/hu/ARCHITECTURE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/ARCHITECTURE.md) | 🇰🇷 [한국어](i18n/ko/ARCHITECTURE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/ARCHITECTURE.md) | 🇳🇱 [Nederlands](i18n/nl/ARCHITECTURE.md) | 🇳🇴 [Norsk](i18n/no/ARCHITECTURE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/ARCHITECTURE.md) | 🇷🇴 [Română](i18n/ro/ARCHITECTURE.md) | 🇵🇱 [Polski](i18n/pl/ARCHITECTURE.md) | 🇸🇰 [Slovenčina](i18n/sk/ARCHITECTURE.md) | 🇸🇪 [Svenska](i18n/sv/ARCHITECTURE.md) | 🇵🇭 [Filipino](i18n/phi/ARCHITECTURE.md)
|
||||
|
||||
_Last updated: 2026-03-04_
|
||||
|
||||
## Executive Summary
|
||||
|
||||
@@ -79,8 +81,8 @@ flowchart LR
|
||||
API[V1 Compatibility API\n/v1/*]
|
||||
DASH[Dashboard + Management API\n/api/*]
|
||||
CORE[SSE + Translation Core\nopen-sse + src/sse]
|
||||
DB[(db.json)]
|
||||
UDB[(usage.json + log.txt)]
|
||||
DB[(storage.sqlite)]
|
||||
UDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph Upstreams[Upstream Providers]
|
||||
@@ -142,7 +144,7 @@ Management domains:
|
||||
- Providers/connections: `src/app/api/providers*`
|
||||
- Provider nodes: `src/app/api/provider-nodes*`
|
||||
- Custom models: `src/app/api/provider-models` (GET/POST/DELETE)
|
||||
- Model catalog: `src/app/api/models/catalog` (GET)
|
||||
- Model catalog: `src/app/api/models/route.ts` (GET)
|
||||
- Proxy config: `src/app/api/settings/proxy` (GET/PUT/DELETE) + `src/app/api/settings/proxy/test` (POST)
|
||||
- OAuth: `src/app/api/oauth/*`
|
||||
- Keys/aliases/combos/pricing: `src/app/api/keys*`, `src/app/api/models/alias`, `src/app/api/combos*`, `src/app/api/pricing`
|
||||
@@ -223,18 +225,19 @@ OAuth provider modules (12 individual files under `src/lib/oauth/providers/`):
|
||||
|
||||
## 3) Persistence Layer
|
||||
|
||||
Primary state DB:
|
||||
Primary state DB (SQLite):
|
||||
|
||||
- `src/lib/localDb.ts`
|
||||
- file: `${DATA_DIR}/db.json` (or `$XDG_CONFIG_HOME/omniroute/db.json` when set, else `~/.omniroute/db.json`)
|
||||
- entities: providerConnections, providerNodes, modelAliases, combos, apiKeys, settings, pricing, **customModels**, **proxyConfig**, **ipFilter**, **thinkingBudget**, **systemPrompt**
|
||||
- Core infra: `src/lib/db/core.ts` (better-sqlite3, migrations, WAL)
|
||||
- Re-export facade: `src/lib/localDb.ts` (thin compatibility layer for callers)
|
||||
- file: `${DATA_DIR}/storage.sqlite` (or `$XDG_CONFIG_HOME/omniroute/storage.sqlite` when set, else `~/.omniroute/storage.sqlite`)
|
||||
- entities (tables + KV namespaces): providerConnections, providerNodes, modelAliases, combos, apiKeys, settings, pricing, **customModels**, **proxyConfig**, **ipFilter**, **thinkingBudget**, **systemPrompt**
|
||||
|
||||
Usage DB:
|
||||
Usage persistence:
|
||||
|
||||
- `src/lib/usageDb.ts`
|
||||
- files: `${DATA_DIR}/usage.json`, `${DATA_DIR}/log.txt`, `${DATA_DIR}/call_logs/`
|
||||
- follows same base directory policy as `localDb` (`DATA_DIR`, then `XDG_CONFIG_HOME/omniroute` when set)
|
||||
- decomposed into focused sub-modules: `migrations.ts`, `usageHistory.ts`, `costCalculator.ts`, `usageStats.ts`, `callLogs.ts`
|
||||
- facade: `src/lib/usageDb.ts` (decomposed modules in `src/lib/usage/*`)
|
||||
- SQLite tables in `storage.sqlite`: `usage_history`, `call_logs`, `proxy_logs`
|
||||
- optional file artifacts remain for compatibility/debug (`${DATA_DIR}/log.txt`, `${DATA_DIR}/call_logs/`, `<repo>/logs/...`)
|
||||
- legacy JSON files are migrated to SQLite by startup migrations when present
|
||||
|
||||
Domain State DB (SQLite):
|
||||
|
||||
@@ -503,9 +506,9 @@ erDiagram
|
||||
|
||||
Physical storage files:
|
||||
|
||||
- main state: `${DATA_DIR}/db.json` (or `$XDG_CONFIG_HOME/omniroute/db.json` when set, else `~/.omniroute/db.json`)
|
||||
- usage stats: `${DATA_DIR}/usage.json`
|
||||
- request log lines: `${DATA_DIR}/log.txt`
|
||||
- primary runtime DB: `${DATA_DIR}/storage.sqlite`
|
||||
- request log lines: `${DATA_DIR}/log.txt` (compat/debug artifact)
|
||||
- structured call payload archives: `${DATA_DIR}/call_logs/`
|
||||
- optional translator/request debug sessions: `<repo>/logs/...`
|
||||
|
||||
## Deployment Topology
|
||||
@@ -520,8 +523,8 @@ flowchart LR
|
||||
subgraph ContainerOrProcess[OmniRoute Runtime]
|
||||
Next[Next.js Server\nPORT=20128]
|
||||
Core[SSE Core + Executors]
|
||||
MainDB[(db.json)]
|
||||
UsageDB[(usage.json/log.txt)]
|
||||
MainDB[(storage.sqlite)]
|
||||
UsageDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph External[External Services]
|
||||
@@ -548,7 +551,7 @@ flowchart LR
|
||||
- `src/app/api/providers*`: provider CRUD, validation, testing
|
||||
- `src/app/api/provider-nodes*`: custom compatible node management
|
||||
- `src/app/api/provider-models`: custom model management (CRUD)
|
||||
- `src/app/api/models/catalog`: full model catalog API (all types grouped by provider)
|
||||
- `src/app/api/models/route.ts`: model catalog API (aliases + custom models)
|
||||
- `src/app/api/oauth/*`: OAuth/device-code flows
|
||||
- `src/app/api/keys*`: local API key lifecycle
|
||||
- `src/app/api/models/alias`: alias management
|
||||
@@ -580,8 +583,9 @@ flowchart LR
|
||||
|
||||
### Persistence
|
||||
|
||||
- `src/lib/localDb.ts`: persistent config/state
|
||||
- `src/lib/usageDb.ts`: usage history and rolling request logs
|
||||
- `src/lib/db/*`: persistent config/state and domain persistence on SQLite
|
||||
- `src/lib/localDb.ts`: compatibility re-export for DB modules
|
||||
- `src/lib/usageDb.ts`: usage history/call logs facade on top of SQLite tables
|
||||
|
||||
## Provider Executor Coverage (Strategy Pattern)
|
||||
|
||||
@@ -722,23 +726,23 @@ Files are written to `<repo>/logs/<session>/` for each request session.
|
||||
|
||||
## 5) Data Integrity
|
||||
|
||||
- DB shape migration/repair for missing keys
|
||||
- corrupt JSON reset safeguards for localDb and usageDb
|
||||
- SQLite schema migrations and auto-upgrade hooks at startup
|
||||
- legacy JSON → SQLite migration compatibility path
|
||||
|
||||
## Observability and Operational Signals
|
||||
|
||||
Runtime visibility sources:
|
||||
|
||||
- console logs from `src/sse/utils/logger.ts`
|
||||
- per-request usage aggregates in `usage.json`
|
||||
- textual request status log in `log.txt`
|
||||
- per-request usage aggregates in SQLite (`usage_history`, `call_logs`, `proxy_logs`)
|
||||
- textual request status log in `log.txt` (optional/compat)
|
||||
- optional deep request/translation logs under `logs/` when `ENABLE_REQUEST_LOGS=true`
|
||||
- dashboard usage endpoints (`/api/usage/*`) for UI consumption
|
||||
|
||||
## Security-Sensitive Boundaries
|
||||
|
||||
- JWT secret (`JWT_SECRET`) secures dashboard session cookie verification/signing
|
||||
- Initial password fallback (`INITIAL_PASSWORD`, default `123456`) must be overridden in real deployments
|
||||
- Initial password bootstrap (`INITIAL_PASSWORD`) should be explicitly configured for first-run provisioning
|
||||
- API key HMAC secret (`API_KEY_SECRET`) secures generated local API key format
|
||||
- Provider secrets (API keys/tokens) are persisted in local DB and should be protected at filesystem level
|
||||
- Cloud sync endpoints rely on API key auth + machine id semantics
|
||||
@@ -760,13 +764,13 @@ Environment variables actively used by code:
|
||||
|
||||
## Known Architectural Notes
|
||||
|
||||
1. `usageDb` and `localDb` now share the same base directory policy (`DATA_DIR` -> `XDG_CONFIG_HOME/omniroute` -> `~/.omniroute`) with legacy file migration.
|
||||
2. `/api/v1/route.ts` returns a static model list and is not the main models source used by `/v1/models`.
|
||||
1. `usageDb` and `localDb` share the same base directory policy (`DATA_DIR` -> `XDG_CONFIG_HOME/omniroute` -> `~/.omniroute`) with legacy file migration.
|
||||
2. `/api/v1/route.ts` delegates to the same unified catalog builder used by `/api/v1/models` (`src/app/api/v1/models/catalog.ts`) to avoid semantic drift.
|
||||
3. Request logger writes full headers/body when enabled; treat log directory as sensitive.
|
||||
4. Cloud behavior depends on correct `NEXT_PUBLIC_BASE_URL` and cloud endpoint reachability.
|
||||
5. The `open-sse/` directory is published as the `@omniroute/open-sse` **npm workspace package**. Source code imports it via `@omniroute/open-sse/...` (resolved by Next.js `transpilePackages`). File paths in this document still use the directory name `open-sse/` for consistency.
|
||||
6. Charts in the dashboard use **Recharts** (SVG-based) for accessible, interactive analytics visualizations (model usage bar charts, provider breakdown tables with success rates).
|
||||
7. E2E tests use **Playwright** (`tests/e2e/`), run via `npm run test:e2e`. Unit tests use **Node.js test runner** (`tests/unit/`), run via `npm run test:plan3`. Source code under `src/` is **TypeScript** (`.ts`/`.tsx`); the `open-sse/` workspace remains JavaScript (`.js`).
|
||||
7. E2E tests use **Playwright** (`tests/e2e/`), run via `npm run test:e2e`. Unit tests use **Node.js test runner** (`tests/unit/`), run via `npm run test:unit`. Source code under `src/` is **TypeScript** (`.ts`/`.tsx`); the `open-sse/` workspace remains JavaScript (`.js`).
|
||||
8. Settings page is organized into 5 tabs: Security, Routing (6 global strategies: fill-first, round-robin, p2c, random, least-used, cost-optimized), Resilience (editable rate limits, circuit breaker, policies), AI (thinking budget, system prompt, prompt cache), Advanced (proxy).
|
||||
|
||||
## Operational Verification Checklist
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
# OmniRoute Auto-Combo Engine
|
||||
|
||||
> Self-managing model chains with adaptive scoring
|
||||
|
||||
## How It Works
|
||||
|
||||
The Auto-Combo Engine dynamically selects the best provider/model for each request using a **6-factor scoring function**:
|
||||
|
||||
| Factor | Weight | Description |
|
||||
| :--------- | :----- | :---------------------------------------------- |
|
||||
| Quota | 0.20 | Remaining capacity [0..1] |
|
||||
| Health | 0.25 | Circuit breaker: CLOSED=1.0, HALF=0.5, OPEN=0.0 |
|
||||
| CostInv | 0.20 | Inverse cost (cheaper = higher score) |
|
||||
| LatencyInv | 0.15 | Inverse p95 latency (faster = higher) |
|
||||
| TaskFit | 0.10 | Model × task type fitness score |
|
||||
| Stability | 0.10 | Low variance in latency/errors |
|
||||
|
||||
## Mode Packs
|
||||
|
||||
| Pack | Focus | Key Weight |
|
||||
| :---------------------- | :----------- | :--------------- |
|
||||
| 🚀 **Ship Fast** | Speed | latencyInv: 0.35 |
|
||||
| 💰 **Cost Saver** | Economy | costInv: 0.40 |
|
||||
| 🎯 **Quality First** | Best model | taskFit: 0.40 |
|
||||
| 📡 **Offline Friendly** | Availability | quota: 0.40 |
|
||||
|
||||
## Self-Healing
|
||||
|
||||
- **Temporary exclusion**: Score < 0.2 → excluded for 5 min (progressive backoff, max 30 min)
|
||||
- **Circuit breaker awareness**: OPEN → auto-excluded; HALF_OPEN → probe requests
|
||||
- **Incident mode**: >50% OPEN → disable exploration, maximize stability
|
||||
- **Cooldown recovery**: After exclusion, first request is a "probe" with reduced timeout
|
||||
|
||||
## Bandit Exploration
|
||||
|
||||
5% of requests (configurable) are routed to random providers for exploration. Disabled in incident mode.
|
||||
|
||||
## API
|
||||
|
||||
```bash
|
||||
# Create auto-combo
|
||||
curl -X POST http://localhost:20128/api/combos/auto \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"id":"my-auto","name":"Auto Coder","candidatePool":["anthropic","google","openai"],"modePack":"ship-fast"}'
|
||||
|
||||
# List auto-combos
|
||||
curl http://localhost:20128/api/combos/auto
|
||||
```
|
||||
|
||||
## Task Fitness
|
||||
|
||||
30+ models scored across 6 task types (`coding`, `review`, `planning`, `analysis`, `debugging`, `documentation`). Supports wildcard patterns (e.g., `*-coder` → high coding score).
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------ |
|
||||
| `open-sse/services/autoCombo/scoring.ts` | Scoring function & pool normalization |
|
||||
| `open-sse/services/autoCombo/taskFitness.ts` | Model × task fitness lookup |
|
||||
| `open-sse/services/autoCombo/engine.ts` | Selection logic, bandit, budget cap |
|
||||
| `open-sse/services/autoCombo/selfHealing.ts` | Exclusion, probes, incident mode |
|
||||
| `open-sse/services/autoCombo/modePacks.ts` | 4 weight profiles |
|
||||
| `src/app/api/combos/auto/route.ts` | REST API |
|
||||
@@ -1,5 +1,7 @@
|
||||
# omniroute — Codebase Documentation
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](CODEBASE_DOCUMENTATION.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/CODEBASE_DOCUMENTATION.md) | 🇪🇸 [Español](i18n/es/CODEBASE_DOCUMENTATION.md) | 🇫🇷 [Français](i18n/fr/CODEBASE_DOCUMENTATION.md) | 🇮🇹 [Italiano](i18n/it/CODEBASE_DOCUMENTATION.md) | 🇷🇺 [Русский](i18n/ru/CODEBASE_DOCUMENTATION.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/CODEBASE_DOCUMENTATION.md) | 🇩🇪 [Deutsch](i18n/de/CODEBASE_DOCUMENTATION.md) | 🇮🇳 [हिन्दी](i18n/in/CODEBASE_DOCUMENTATION.md) | 🇹🇭 [ไทย](i18n/th/CODEBASE_DOCUMENTATION.md) | 🇺🇦 [Українська](i18n/uk-UA/CODEBASE_DOCUMENTATION.md) | 🇸🇦 [العربية](i18n/ar/CODEBASE_DOCUMENTATION.md) | 🇯🇵 [日本語](i18n/ja/CODEBASE_DOCUMENTATION.md) | 🇻🇳 [Tiếng Việt](i18n/vi/CODEBASE_DOCUMENTATION.md) | 🇧🇬 [Български](i18n/bg/CODEBASE_DOCUMENTATION.md) | 🇩🇰 [Dansk](i18n/da/CODEBASE_DOCUMENTATION.md) | 🇫🇮 [Suomi](i18n/fi/CODEBASE_DOCUMENTATION.md) | 🇮🇱 [עברית](i18n/he/CODEBASE_DOCUMENTATION.md) | 🇭🇺 [Magyar](i18n/hu/CODEBASE_DOCUMENTATION.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/CODEBASE_DOCUMENTATION.md) | 🇰🇷 [한국어](i18n/ko/CODEBASE_DOCUMENTATION.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/CODEBASE_DOCUMENTATION.md) | 🇳🇱 [Nederlands](i18n/nl/CODEBASE_DOCUMENTATION.md) | 🇳🇴 [Norsk](i18n/no/CODEBASE_DOCUMENTATION.md) | 🇵🇹 [Português (Portugal)](i18n/pt/CODEBASE_DOCUMENTATION.md) | 🇷🇴 [Română](i18n/ro/CODEBASE_DOCUMENTATION.md) | 🇵🇱 [Polski](i18n/pl/CODEBASE_DOCUMENTATION.md) | 🇸🇰 [Slovenčina](i18n/sk/CODEBASE_DOCUMENTATION.md) | 🇸🇪 [Svenska](i18n/sv/CODEBASE_DOCUMENTATION.md) | 🇵🇭 [Filipino](i18n/phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
> A comprehensive, beginner-friendly guide to the **omniroute** multi-provider AI proxy router.
|
||||
|
||||
---
|
||||
|
||||
+71
-4
@@ -1,5 +1,7 @@
|
||||
# OmniRoute — Dashboard Features Gallery
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](FEATURES.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/FEATURES.md) | 🇪🇸 [Español](i18n/es/FEATURES.md) | 🇫🇷 [Français](i18n/fr/FEATURES.md) | 🇮🇹 [Italiano](i18n/it/FEATURES.md) | 🇷🇺 [Русский](i18n/ru/FEATURES.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/FEATURES.md) | 🇩🇪 [Deutsch](i18n/de/FEATURES.md) | 🇮🇳 [हिन्दी](i18n/in/FEATURES.md) | 🇹🇭 [ไทย](i18n/th/FEATURES.md) | 🇺🇦 [Українська](i18n/uk-UA/FEATURES.md) | 🇸🇦 [العربية](i18n/ar/FEATURES.md) | 🇯🇵 [日本語](i18n/ja/FEATURES.md) | 🇻🇳 [Tiếng Việt](i18n/vi/FEATURES.md) | 🇧🇬 [Български](i18n/bg/FEATURES.md) | 🇩🇰 [Dansk](i18n/da/FEATURES.md) | 🇫🇮 [Suomi](i18n/fi/FEATURES.md) | 🇮🇱 [עברית](i18n/he/FEATURES.md) | 🇭🇺 [Magyar](i18n/hu/FEATURES.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/FEATURES.md) | 🇰🇷 [한국어](i18n/ko/FEATURES.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/FEATURES.md) | 🇳🇱 [Nederlands](i18n/nl/FEATURES.md) | 🇳🇴 [Norsk](i18n/no/FEATURES.md) | 🇵🇹 [Português (Portugal)](i18n/pt/FEATURES.md) | 🇷🇴 [Română](i18n/ro/FEATURES.md) | 🇵🇱 [Polski](i18n/pl/FEATURES.md) | 🇸🇰 [Slovenčina](i18n/sk/FEATURES.md) | 🇸🇪 [Svenska](i18n/sv/FEATURES.md) | 🇵🇭 [Filipino](i18n/phi/FEATURES.md)
|
||||
|
||||
Visual guide to every section of the OmniRoute dashboard.
|
||||
|
||||
---
|
||||
@@ -14,7 +16,7 @@ Manage AI provider connections: OAuth providers (Claude Code, Codex, Gemini CLI)
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
Create model routing combos with 6 strategies: fill-first, round-robin, power-of-two-choices, random, least-used, and cost-optimized. Each combo chains multiple models with automatic fallback.
|
||||
Create model routing combos with 6 strategies: priority, weighted, round-robin, random, least-used, and cost-optimized. Each combo chains multiple models with automatic fallback and includes quick templates and readiness checks.
|
||||
|
||||

|
||||
|
||||
@@ -44,9 +46,28 @@ Four modes for debugging API translations: **Playground** (format converter), **
|
||||
|
||||
---
|
||||
|
||||
## 🎮 Model Playground _(v2.0.9+)_
|
||||
|
||||
Test any model directly from the dashboard. Select provider, model, and endpoint, write prompts with Monaco Editor, stream responses in real-time, abort mid-stream, and view timing metrics.
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Themes _(v2.0.5+)_
|
||||
|
||||
Customizable color themes for the entire dashboard. Choose from 7 preset colors (Coral, Blue, Red, Green, Violet, Orange, Cyan) or create a custom theme by picking any hex color. Supports light, dark, and system mode.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Settings
|
||||
|
||||
General settings, system storage, backup management (export/import database), appearance (dark/light mode), security (includes API endpoint protection and custom provider blocking), routing, resilience, and advanced configuration.
|
||||
Comprehensive settings panel with tabs:
|
||||
|
||||
- **General** — System storage, backup management (export/import database)
|
||||
- **Appearance** — Theme selector (dark/light/system), color theme presets and custom colors, health log visibility
|
||||
- **Security** — API endpoint protection, custom provider blocking, IP filtering, session info
|
||||
- **Routing** — Model aliases, background task degradation
|
||||
- **Resilience** — Rate limit persistence, circuit breaker tuning
|
||||
- **Advanced** — Configuration overrides
|
||||
|
||||

|
||||
|
||||
@@ -54,12 +75,29 @@ General settings, system storage, backup management (export/import database), ap
|
||||
|
||||
## 🔧 CLI Tools
|
||||
|
||||
One-click configuration for AI coding tools: Claude Code, Codex CLI, Gemini CLI, OpenClaw, Kilo Code, and Antigravity.
|
||||
One-click configuration for AI coding tools: Claude Code, Codex CLI, Gemini CLI, OpenClaw, Kilo Code, Antigravity, Cline, Continue, Cursor, and Factory Droid. Features automated config apply/reset, connection profiles, and model mapping.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🤖 CLI Agents _(v2.0.11+)_
|
||||
|
||||
Dashboard for discovering and managing CLI agents. Shows a grid of 14 built-in agents (Codex, Claude, Goose, Gemini CLI, OpenClaw, Aider, OpenCode, Cline, Qwen Code, ForgeCode, Amazon Q, Open Interpreter, Cursor CLI, Warp) with:
|
||||
|
||||
- **Installation status** — Installed / Not Found with version detection
|
||||
- **Protocol badges** — stdio, HTTP, etc.
|
||||
- **Custom agents** — Register any CLI tool via form (name, binary, version command, spawn args)
|
||||
- **CLI Fingerprint Matching** — Per-provider toggle to match native CLI request signatures, reducing ban risk while preserving proxy IP
|
||||
|
||||
---
|
||||
|
||||
## 🖼️ Media _(v2.0.3+)_
|
||||
|
||||
Generate images, videos, and music from the dashboard. Supports OpenAI, xAI, Together, Hyperbolic, SD WebUI, ComfyUI, AnimateDiff, Stable Audio Open, and MusicGen.
|
||||
|
||||
---
|
||||
|
||||
## 📝 Request Logs
|
||||
|
||||
Real-time request logging with filtering by provider, model, account, and API key. Shows status codes, token usage, latency, and response details.
|
||||
@@ -70,6 +108,35 @@ Real-time request logging with filtering by provider, model, account, and API ke
|
||||
|
||||
## 🌐 API Endpoint
|
||||
|
||||
Your unified API endpoint with capability breakdown: Chat Completions, Embeddings, Image Generation, Reranking, Audio Transcription, and registered API keys.
|
||||
Your unified API endpoint with capability breakdown: Chat Completions, Responses API, Embeddings, Image Generation, Reranking, Audio Transcription, Text-to-Speech, Moderations, and registered API keys. Cloud proxy support for remote access.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔑 API Key Management
|
||||
|
||||
Create, scope, and revoke API keys. Each key can be restricted to specific models/providers with full access or read-only permissions. Visual key management with usage tracking.
|
||||
|
||||
---
|
||||
|
||||
## 📋 Audit Log
|
||||
|
||||
Administrative action tracking with filtering by action type, actor, target, IP address, and timestamp. Full security event history.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application
|
||||
|
||||
Native Electron desktop app for Windows, macOS, and Linux. Run OmniRoute as a standalone application with system tray integration, offline support, auto-update, and one-click install.
|
||||
|
||||
Key features:
|
||||
|
||||
- Server readiness polling (no blank screen on cold start)
|
||||
- System tray with port management
|
||||
- Content Security Policy
|
||||
- Single-instance lock
|
||||
- Auto-update on restart
|
||||
- Platform-conditional UI (macOS traffic lights, Windows/Linux default titlebar)
|
||||
|
||||
📖 See [`electron/README.md`](../electron/README.md) for full documentation.
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
# OmniRoute MCP Server Documentation
|
||||
|
||||
> Model Context Protocol server with 16 intelligent tools
|
||||
|
||||
## Installation
|
||||
|
||||
OmniRoute MCP is built-in. Start it with:
|
||||
|
||||
```bash
|
||||
omniroute --mcp
|
||||
```
|
||||
|
||||
Or via the open-sse transport:
|
||||
|
||||
```bash
|
||||
# HTTP streamable transport (port 20130)
|
||||
omniroute --dev # MCP auto-starts on /mcp endpoint
|
||||
```
|
||||
|
||||
## IDE Configuration
|
||||
|
||||
See [IDE Configs](integrations/ide-configs.md) for Antigravity, Cursor, Copilot, and Claude Desktop setup.
|
||||
|
||||
---
|
||||
|
||||
## Essential Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :------------------------------ | :--------------------------------------- |
|
||||
| `omniroute_get_health` | Gateway health, circuit breakers, uptime |
|
||||
| `omniroute_list_combos` | All configured combos with models |
|
||||
| `omniroute_get_combo_metrics` | Performance metrics for a specific combo |
|
||||
| `omniroute_switch_combo` | Switch active combo by ID/name |
|
||||
| `omniroute_check_quota` | Quota status per provider or all |
|
||||
| `omniroute_route_request` | Send a chat completion through OmniRoute |
|
||||
| `omniroute_cost_report` | Cost analytics for a time period |
|
||||
| `omniroute_list_models_catalog` | Full model catalog with capabilities |
|
||||
|
||||
## Advanced Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :--------------------------------- | :---------------------------------------------- |
|
||||
| `omniroute_simulate_route` | Dry-run routing simulation with fallback tree |
|
||||
| `omniroute_set_budget_guard` | Session budget with degrade/block/alert actions |
|
||||
| `omniroute_set_resilience_profile` | Apply conservative/balanced/aggressive preset |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo |
|
||||
| `omniroute_get_provider_metrics` | Detailed metrics for one provider |
|
||||
| `omniroute_best_combo_for_task` | Task-fitness recommendation with alternatives |
|
||||
| `omniroute_explain_route` | Explain a past routing decision |
|
||||
| `omniroute_get_session_snapshot` | Full session state: costs, tokens, errors |
|
||||
|
||||
## Authentication
|
||||
|
||||
MCP tools are authenticated via API key scopes. Each tool requires specific scopes:
|
||||
|
||||
| Scope | Tools |
|
||||
| :------------- | :----------------------------------------------- |
|
||||
| `read:health` | get_health, get_provider_metrics |
|
||||
| `read:combos` | list_combos, get_combo_metrics |
|
||||
| `write:combos` | switch_combo |
|
||||
| `read:quota` | check_quota |
|
||||
| `write:route` | route_request, simulate_route, test_combo |
|
||||
| `read:usage` | cost_report, get_session_snapshot, explain_route |
|
||||
| `write:config` | set_budget_guard, set_resilience_profile |
|
||||
| `read:models` | list_models_catalog, best_combo_for_task |
|
||||
|
||||
## Audit Logging
|
||||
|
||||
Every tool call is logged to `mcp_tool_audit` with:
|
||||
|
||||
- Tool name, arguments, result
|
||||
- Duration (ms), success/failure
|
||||
- API key hash, timestamp
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------------ |
|
||||
| `open-sse/mcp-server/server.ts` | MCP server creation + 16 tool registrations |
|
||||
| `open-sse/mcp-server/transport.ts` | Stdio + HTTP transport |
|
||||
| `open-sse/mcp-server/auth.ts` | API key + scope validation |
|
||||
| `open-sse/mcp-server/audit.ts` | Tool call audit logging |
|
||||
| `open-sse/mcp-server/tools/advancedTools.ts` | 8 advanced tool handlers |
|
||||
@@ -0,0 +1,33 @@
|
||||
# Release Checklist
|
||||
|
||||
Use this checklist before tagging or publishing a new OmniRoute release.
|
||||
|
||||
## Version and Changelog
|
||||
|
||||
1. Bump `package.json` version (`x.y.z`) in the release branch.
|
||||
2. Move release notes from `## [Unreleased]` in `CHANGELOG.md` to a dated section:
|
||||
- `## [x.y.z] — YYYY-MM-DD`
|
||||
3. Keep `## [Unreleased]` as the first changelog section for upcoming work.
|
||||
4. Ensure the latest semver section in `CHANGELOG.md` equals `package.json` version.
|
||||
|
||||
## API Docs
|
||||
|
||||
1. Update `docs/openapi.yaml`:
|
||||
- `info.version` must equal `package.json` version.
|
||||
2. Validate endpoint examples if API contracts changed.
|
||||
|
||||
## Runtime Docs
|
||||
|
||||
1. Review `docs/ARCHITECTURE.md` for storage/runtime drift.
|
||||
2. Review `docs/TROUBLESHOOTING.md` for env var and operational drift.
|
||||
3. Update localized docs if source docs changed significantly.
|
||||
|
||||
## Automated Check
|
||||
|
||||
Run the sync guard locally before opening PR:
|
||||
|
||||
```bash
|
||||
npm run check:docs-sync
|
||||
```
|
||||
|
||||
CI also runs this check in `.github/workflows/ci.yml` (lint job).
|
||||
-113
@@ -1,113 +0,0 @@
|
||||
# Rate Limiting & Flow Control Overhaul — Tasks
|
||||
|
||||
> Referência: [Relatório de Análise](../walkthrough.md) · Fase docs em `/docs/phases/`
|
||||
|
||||
---
|
||||
|
||||
## Fase 1 — Error Classification & Provider Profiles
|
||||
|
||||
### Backend Core
|
||||
|
||||
- [x] `constants.ts` — Substituir `COOLDOWN_MS.transient` por `transientInitial` (5s) + `transientMax` (60s)
|
||||
- [x] `constants.ts` — Adicionar `PROVIDER_PROFILES` (oauth / apikey) com cooldowns diferenciados
|
||||
- [x] `constants.ts` — Adicionar `DEFAULT_API_LIMITS` (100 RPM, 200ms minTime)
|
||||
- [x] `providerRegistry.ts` — Criar helper `getProviderCategory(providerId)` → `"oauth"` | `"apikey"`
|
||||
- [x] `accountFallback.ts` — Aceitar `provider` como parâmetro em `checkFallbackError`
|
||||
- [x] `accountFallback.ts` — Implementar backoff exponencial para 502/503/504 transientes
|
||||
- [x] `accountFallback.ts` — Calcular cooldown baseado no perfil do provedor
|
||||
- [x] `accountFallback.ts` — Adicionar helper `getProviderProfile(provider)`
|
||||
|
||||
### Callers (propagar `provider`)
|
||||
|
||||
- [x] `auth.ts` → `markAccountUnavailable` — Passar `provider` para `checkFallbackError`
|
||||
- [x] `combo.ts` → `handleComboChat` / `handleRoundRobinCombo` — Passar `provider` nos erros
|
||||
|
||||
### Testes
|
||||
|
||||
- [x] Atualizar `rate-limit-enhanced.test.mjs` — Teste "transient errors don't increase backoff" → `newBackoffLevel = 1`
|
||||
- [x] Criar `error-classification.test.mjs` — Cooldown exponencial 502, perfis OAuth/API, helper `getProviderCategory`
|
||||
|
||||
---
|
||||
|
||||
## Fase 2 — Circuit Breaker no Combo Pipeline
|
||||
|
||||
### Backend
|
||||
|
||||
- [x] `combo.ts` — Importar `getCircuitBreaker` e `CircuitBreakerOpenError`
|
||||
- [x] `combo.ts` — `handleComboChat` — Verificar `breaker.canExecute()` antes de cada modelo
|
||||
- [x] `combo.ts` — `handleRoundRobinCombo` — Integrar breaker per-model
|
||||
- [x] `combo.ts` — Marcar `semaphore.markRateLimited` para 502/503/504 (não só 429)
|
||||
- [x] `combo.ts` — Implementar early exit quando todos os modelos têm breaker OPEN
|
||||
|
||||
### Testes
|
||||
|
||||
- [x] Criar `combo-circuit-breaker.test.mjs` — Combo skip breaker OPEN, early exit, semáforo 502
|
||||
|
||||
---
|
||||
|
||||
## Fase 3 — Anti-Thundering Herd & Auto Rate Limit
|
||||
|
||||
### Backend
|
||||
|
||||
- [x] `rateLimitManager.ts` — Auto-enable para `apikey` providers com limites elevados
|
||||
- [x] `rateLimitManager.ts` — Criar limiter com defaults (100 RPM) quando não configurado
|
||||
- [x] `auth.ts` — Adicionar mutex na `markAccountUnavailable` para evitar marcação paralela
|
||||
|
||||
### Testes
|
||||
|
||||
- [x] Criar `thundering-herd.test.mjs` — Mutex, auto-enable, limites não restritivos
|
||||
|
||||
---
|
||||
|
||||
## Fase 4 — Frontend Resilience UI
|
||||
|
||||
### Settings Page
|
||||
|
||||
- [x] `settings/page.tsx` — Adicionar tab "Resilience" (icon: `health_and_safety`) entre Routing e Pricing
|
||||
|
||||
### Novos Componentes
|
||||
|
||||
- [x] Criar `ResilienceTab.tsx` — Layout com 4 cards (Provider Profiles → Rate Limiting → Circuit Breakers → Policies)
|
||||
- [x] Criar `ProviderProfilesCard.tsx` — Toggle OAuth/API Key, inputs para cooldowns
|
||||
- [x] Criar `CircuitBreakerCard.tsx` — Status real-time per-provider, auto-refresh 5s, botão reset
|
||||
- [x] Criar `RateLimitOverviewCard.tsx` — Tabela providers × accounts × cooldown — **agora editável com RPM, Min Gap, Max Concurrent**
|
||||
|
||||
### API Routes
|
||||
|
||||
- [x] Criar `api/resilience/route.ts` — GET (estado completo + defaults mesclados) + PATCH (salvar perfis + defaults)
|
||||
- [x] Criar `api/resilience/reset/route.ts` — POST (resetar breakers + cooldowns)
|
||||
|
||||
### Migração
|
||||
|
||||
- [x] `PoliciesPanel.tsx` movido de Security para Resilience tab
|
||||
|
||||
---
|
||||
|
||||
## Fase 5 — Settings Page Restructure (v0.9.0)
|
||||
|
||||
### Tab Reorganization
|
||||
|
||||
- [x] **Security** — Simplificado para Login/Password + IP Access Control
|
||||
- [x] **Routing** — Expandido para 6 estratégias globais com descrições
|
||||
- [x] **Resilience** — Reordenado: Provider Profiles → Rate Limiting (editável) → Circuit Breakers → Policies
|
||||
- [x] **AI** — Thinking Budget + System Prompt + Prompt Cache (movido do Advanced)
|
||||
- [x] **Advanced** — Simplificado para apenas Global Proxy
|
||||
|
||||
### Backend Routing Strategies
|
||||
|
||||
- [x] `auth.ts` — Implementar `random` (Fisher-Yates shuffle)
|
||||
- [x] `auth.ts` — Implementar `least-used` (sorted by lastUsedAt)
|
||||
- [x] `auth.ts` — Implementar `cost-optimized` (sorted by priority)
|
||||
- [x] `auth.ts` — Corrigir `p2c` (power-of-two-choices com health scoring)
|
||||
- [x] `settings.ts` — Expandir tipo `fallbackStrategy` para 6 valores
|
||||
|
||||
---
|
||||
|
||||
## Verificação Final
|
||||
|
||||
- [x] Rodar todos os testes unitários: `node --test tests/unit/*.test.mjs`
|
||||
- [x] Build do Next.js: `npm run build`
|
||||
- [x] Verificar aba Resilience no browser
|
||||
- [x] Testar persistência dos perfis (salvar → reload)
|
||||
- [x] Testar Reset All Breakers
|
||||
- [x] Verificar todas as 5 tabs reestruturadas
|
||||
+42
-3
@@ -1,5 +1,7 @@
|
||||
# Troubleshooting
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](TROUBLESHOOTING.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/TROUBLESHOOTING.md) | 🇪🇸 [Español](i18n/es/TROUBLESHOOTING.md) | 🇫🇷 [Français](i18n/fr/TROUBLESHOOTING.md) | 🇮🇹 [Italiano](i18n/it/TROUBLESHOOTING.md) | 🇷🇺 [Русский](i18n/ru/TROUBLESHOOTING.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/TROUBLESHOOTING.md) | 🇩🇪 [Deutsch](i18n/de/TROUBLESHOOTING.md) | 🇮🇳 [हिन्दी](i18n/in/TROUBLESHOOTING.md) | 🇹🇭 [ไทย](i18n/th/TROUBLESHOOTING.md) | 🇺🇦 [Українська](i18n/uk-UA/TROUBLESHOOTING.md) | 🇸🇦 [العربية](i18n/ar/TROUBLESHOOTING.md) | 🇯🇵 [日本語](i18n/ja/TROUBLESHOOTING.md) | 🇻🇳 [Tiếng Việt](i18n/vi/TROUBLESHOOTING.md) | 🇧🇬 [Български](i18n/bg/TROUBLESHOOTING.md) | 🇩🇰 [Dansk](i18n/da/TROUBLESHOOTING.md) | 🇫🇮 [Suomi](i18n/fi/TROUBLESHOOTING.md) | 🇮🇱 [עברית](i18n/he/TROUBLESHOOTING.md) | 🇭🇺 [Magyar](i18n/hu/TROUBLESHOOTING.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/TROUBLESHOOTING.md) | 🇰🇷 [한국어](i18n/ko/TROUBLESHOOTING.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/TROUBLESHOOTING.md) | 🇳🇱 [Nederlands](i18n/nl/TROUBLESHOOTING.md) | 🇳🇴 [Norsk](i18n/no/TROUBLESHOOTING.md) | 🇵🇹 [Português (Portugal)](i18n/pt/TROUBLESHOOTING.md) | 🇷🇴 [Română](i18n/ro/TROUBLESHOOTING.md) | 🇵🇱 [Polski](i18n/pl/TROUBLESHOOTING.md) | 🇸🇰 [Slovenčina](i18n/sk/TROUBLESHOOTING.md) | 🇸🇪 [Svenska](i18n/sv/TROUBLESHOOTING.md) | 🇵🇭 [Filipino](i18n/phi/TROUBLESHOOTING.md)
|
||||
|
||||
Common problems and solutions for OmniRoute.
|
||||
|
||||
---
|
||||
@@ -8,9 +10,11 @@ Common problems and solutions for OmniRoute.
|
||||
|
||||
| Problem | Solution |
|
||||
| ----------------------------- | ------------------------------------------------------------------ |
|
||||
| First login not working | Check `INITIAL_PASSWORD` in `.env` (default: `123456`) |
|
||||
| First login not working | Set `INITIAL_PASSWORD` in `.env` (no hardcoded default) |
|
||||
| Dashboard opens on wrong port | Set `PORT=20128` and `NEXT_PUBLIC_BASE_URL=http://localhost:20128` |
|
||||
| No request logs under `logs/` | Set `ENABLE_REQUEST_LOGS=true` |
|
||||
| EACCES: permission denied | Set `DATA_DIR=/path/to/writable/dir` to override `~/.omniroute` |
|
||||
| Routing strategy not saving | Update to v1.4.11+ (Zod schema fix for settings persistence) |
|
||||
|
||||
---
|
||||
|
||||
@@ -116,8 +120,8 @@ curl http://localhost:20128/api/monitoring/health
|
||||
|
||||
### Runtime Storage
|
||||
|
||||
- Main state: `${DATA_DIR}/db.json` (providers, combos, aliases, keys, settings)
|
||||
- Usage: `${DATA_DIR}/usage.json`, `${DATA_DIR}/log.txt`, `${DATA_DIR}/call_logs/`
|
||||
- Main state: `${DATA_DIR}/storage.sqlite` (providers, combos, aliases, keys, settings)
|
||||
- Usage: SQLite tables in `storage.sqlite` (`usage_history`, `call_logs`, `proxy_logs`) + optional `${DATA_DIR}/log.txt` and `${DATA_DIR}/call_logs/`
|
||||
- Request logs: `<repo>/logs/...` (when `ENABLE_REQUEST_LOGS=true`)
|
||||
|
||||
---
|
||||
@@ -206,6 +210,41 @@ When many concurrent requests hit a rate-limited provider, OmniRoute uses mutex
|
||||
|
||||
---
|
||||
|
||||
## Optional RAG / LLM failure taxonomy (16 problems)
|
||||
|
||||
Some OmniRoute users place the gateway in front of RAG or agent stacks. In those setups it is common to see a strange pattern: OmniRoute looks healthy (providers up, routing profiles ok, no rate limit alerts) but the final answer is still wrong.
|
||||
|
||||
In practice these incidents usually come from the downstream RAG pipeline, not from the gateway itself.
|
||||
|
||||
If you want a shared vocabulary to describe those failures you can use the WFGY ProblemMap, an external MIT license text resource that defines sixteen recurring RAG / LLM failure patterns. At a high level it covers:
|
||||
|
||||
- retrieval drift and broken context boundaries
|
||||
- empty or stale indexes and vector stores
|
||||
- embedding versus semantic mismatch
|
||||
- prompt assembly and context window issues
|
||||
- logic collapse and overconfident answers
|
||||
- long chain and agent coordination failures
|
||||
- multi agent memory and role drift
|
||||
- deployment and bootstrap ordering problems
|
||||
|
||||
The idea is simple:
|
||||
|
||||
1. When you investigate a bad response, capture:
|
||||
- user task and request
|
||||
- route or provider combo in OmniRoute
|
||||
- any RAG context used downstream (retrieved documents, tool calls, etc)
|
||||
2. Map the incident to one or two WFGY ProblemMap numbers (`No.1` … `No.16`).
|
||||
3. Store the number in your own dashboard, runbook, or incident tracker next to the OmniRoute logs.
|
||||
4. Use the corresponding WFGY page to decide whether you need to change your RAG stack, retriever, or routing strategy.
|
||||
|
||||
Full text and concrete recipes live here (MIT license, text only):
|
||||
|
||||
[WFGY ProblemMap README](https://github.com/onestardao/WFGY/blob/main/ProblemMap/README.md)
|
||||
|
||||
You can ignore this section if you do not run RAG or agent pipelines behind OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Still Stuck?
|
||||
|
||||
- **GitHub Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
|
||||
+127
-14
@@ -1,5 +1,7 @@
|
||||
# User Guide
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](USER_GUIDE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/USER_GUIDE.md) | 🇪🇸 [Español](i18n/es/USER_GUIDE.md) | 🇫🇷 [Français](i18n/fr/USER_GUIDE.md) | 🇮🇹 [Italiano](i18n/it/USER_GUIDE.md) | 🇷🇺 [Русский](i18n/ru/USER_GUIDE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/USER_GUIDE.md) | 🇩🇪 [Deutsch](i18n/de/USER_GUIDE.md) | 🇮🇳 [हिन्दी](i18n/in/USER_GUIDE.md) | 🇹🇭 [ไทย](i18n/th/USER_GUIDE.md) | 🇺🇦 [Українська](i18n/uk-UA/USER_GUIDE.md) | 🇸🇦 [العربية](i18n/ar/USER_GUIDE.md) | 🇯🇵 [日本語](i18n/ja/USER_GUIDE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/USER_GUIDE.md) | 🇧🇬 [Български](i18n/bg/USER_GUIDE.md) | 🇩🇰 [Dansk](i18n/da/USER_GUIDE.md) | 🇫🇮 [Suomi](i18n/fi/USER_GUIDE.md) | 🇮🇱 [עברית](i18n/he/USER_GUIDE.md) | 🇭🇺 [Magyar](i18n/hu/USER_GUIDE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/USER_GUIDE.md) | 🇰🇷 [한국어](i18n/ko/USER_GUIDE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/USER_GUIDE.md) | 🇳🇱 [Nederlands](i18n/nl/USER_GUIDE.md) | 🇳🇴 [Norsk](i18n/no/USER_GUIDE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/USER_GUIDE.md) | 🇷🇴 [Română](i18n/ro/USER_GUIDE.md) | 🇵🇱 [Polski](i18n/pl/USER_GUIDE.md) | 🇸🇰 [Slovenčina](i18n/sk/USER_GUIDE.md) | 🇸🇪 [Svenska](i18n/sv/USER_GUIDE.md) | 🇵🇭 [Filipino](i18n/phi/USER_GUIDE.md)
|
||||
|
||||
Complete guide for configuring providers, creating combos, integrating CLI tools, and deploying OmniRoute.
|
||||
|
||||
---
|
||||
@@ -316,6 +318,25 @@ Model: cc/claude-opus-4-6
|
||||
|
||||
## 🚀 Deployment
|
||||
|
||||
### Global npm install (Recommended)
|
||||
|
||||
```bash
|
||||
npm install -g omniroute
|
||||
|
||||
# Create config directory
|
||||
mkdir -p ~/.omniroute
|
||||
|
||||
# Create .env file (see .env.example)
|
||||
cp .env.example ~/.omniroute/.env
|
||||
|
||||
# Start server
|
||||
omniroute
|
||||
# Or with custom port:
|
||||
omniroute --port 3000
|
||||
```
|
||||
|
||||
The CLI automatically loads `.env` from `~/.omniroute/.env` or `./.env`.
|
||||
|
||||
### VPS Deployment
|
||||
|
||||
```bash
|
||||
@@ -335,6 +356,43 @@ npm run start
|
||||
# Or: pm2 start npm --name omniroute -- start
|
||||
```
|
||||
|
||||
### PM2 Deployment (Low Memory)
|
||||
|
||||
For servers with limited RAM, use the memory limit option:
|
||||
|
||||
```bash
|
||||
# With 512MB limit (default)
|
||||
pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or with custom memory limit
|
||||
OMNIROUTE_MEMORY_MB=512 pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or using ecosystem.config.js
|
||||
pm2 start ecosystem.config.js
|
||||
```
|
||||
|
||||
Create `ecosystem.config.js`:
|
||||
|
||||
```javascript
|
||||
module.exports = {
|
||||
apps: [
|
||||
{
|
||||
name: "omniroute",
|
||||
script: "npm",
|
||||
args: "start",
|
||||
env: {
|
||||
NODE_ENV: "production",
|
||||
OMNIROUTE_MEMORY_MB: "512",
|
||||
JWT_SECRET: "your-secret",
|
||||
INITIAL_PASSWORD: "your-password",
|
||||
},
|
||||
node_args: "--max-old-space-size=512",
|
||||
max_memory_restart: "300M",
|
||||
},
|
||||
],
|
||||
};
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
@@ -349,20 +407,23 @@ For host-integrated mode with CLI binaries, see the Docker section in the main d
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| --------------------- | ------------------------------------ | ------------------------------------------------------- |
|
||||
| `JWT_SECRET` | `omniroute-default-secret-change-me` | JWT signing secret (**change in production**) |
|
||||
| `INITIAL_PASSWORD` | `123456` | First login password |
|
||||
| `DATA_DIR` | `~/.omniroute` | Data directory (db, usage, logs) |
|
||||
| `PORT` | framework default | Service port (`20128` in examples) |
|
||||
| `HOSTNAME` | framework default | Bind host (Docker defaults to `0.0.0.0`) |
|
||||
| `NODE_ENV` | runtime default | Set `production` for deploy |
|
||||
| `BASE_URL` | `http://localhost:20128` | Server-side internal base URL |
|
||||
| `CLOUD_URL` | `https://omniroute.dev` | Cloud sync endpoint base URL |
|
||||
| `API_KEY_SECRET` | `endpoint-proxy-api-key-secret` | HMAC secret for generated API keys |
|
||||
| `REQUIRE_API_KEY` | `false` | Enforce Bearer API key on `/v1/*` |
|
||||
| `ENABLE_REQUEST_LOGS` | `false` | Enables request/response logs |
|
||||
| `AUTH_COOKIE_SECURE` | `false` | Force `Secure` auth cookie (behind HTTPS reverse proxy) |
|
||||
| Variable | Default | Description |
|
||||
| ------------------------- | ------------------------------------ | ------------------------------------------------------- |
|
||||
| `JWT_SECRET` | `omniroute-default-secret-change-me` | JWT signing secret (**change in production**) |
|
||||
| `INITIAL_PASSWORD` | `123456` | First login password |
|
||||
| `DATA_DIR` | `~/.omniroute` | Data directory (db, usage, logs) |
|
||||
| `PORT` | framework default | Service port (`20128` in examples) |
|
||||
| `HOSTNAME` | framework default | Bind host (Docker defaults to `0.0.0.0`) |
|
||||
| `NODE_ENV` | runtime default | Set `production` for deploy |
|
||||
| `BASE_URL` | `http://localhost:20128` | Server-side internal base URL |
|
||||
| `CLOUD_URL` | `https://omniroute.dev` | Cloud sync endpoint base URL |
|
||||
| `API_KEY_SECRET` | `endpoint-proxy-api-key-secret` | HMAC secret for generated API keys |
|
||||
| `REQUIRE_API_KEY` | `false` | Enforce Bearer API key on `/v1/*` |
|
||||
| `ENABLE_REQUEST_LOGS` | `false` | Enables request/response logs |
|
||||
| `AUTH_COOKIE_SECURE` | `false` | Force `Secure` auth cookie (behind HTTPS reverse proxy) |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit in MB |
|
||||
| `PROMPT_CACHE_MAX_SIZE` | `50` | Max prompt cache entries |
|
||||
| `SEMANTIC_CACHE_MAX_SIZE` | `100` | Max semantic cache entries |
|
||||
|
||||
For the full environment variable reference, see the [README](../README.md).
|
||||
|
||||
@@ -694,3 +755,55 @@ Access via **Dashboard → Health**. Real-time system health overview with 6 car
|
||||
| **Latency Telemetry** | p50/p95/p99 latency aggregation per provider |
|
||||
|
||||
**Pro Tip:** The Health page auto-refreshes every 10 seconds. Use the circuit breaker card to identify which providers are experiencing issues.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application (Electron)
|
||||
|
||||
OmniRoute is available as a native desktop application for Windows, macOS, and Linux.
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# From the electron directory:
|
||||
cd electron
|
||||
npm install
|
||||
|
||||
# Development mode (connect to running Next.js dev server):
|
||||
npm run dev
|
||||
|
||||
# Production mode (uses standalone build):
|
||||
npm start
|
||||
```
|
||||
|
||||
### Building Installers
|
||||
|
||||
```bash
|
||||
cd electron
|
||||
npm run build # Current platform
|
||||
npm run build:win # Windows (.exe NSIS)
|
||||
npm run build:mac # macOS (.dmg universal)
|
||||
npm run build:linux # Linux (.AppImage)
|
||||
```
|
||||
|
||||
Output → `electron/dist-electron/`
|
||||
|
||||
### Key Features
|
||||
|
||||
| Feature | Description |
|
||||
| --------------------------- | ---------------------------------------------------- |
|
||||
| **Server Readiness** | Polls server before showing window (no blank screen) |
|
||||
| **System Tray** | Minimize to tray, change port, quit from tray menu |
|
||||
| **Port Management** | Change server port from tray (auto-restarts server) |
|
||||
| **Content Security Policy** | Restrictive CSP via session headers |
|
||||
| **Single Instance** | Only one app instance can run at a time |
|
||||
| **Offline Mode** | Bundled Next.js server works without internet |
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| --------------------- | ------- | -------------------------------- |
|
||||
| `OMNIROUTE_PORT` | `20128` | Server port |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit (64–16384 MB) |
|
||||
|
||||
📖 Full documentation: [`electron/README.md`](../electron/README.md)
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
# ADR-001: Next.js as the Foundation for an AI Gateway
|
||||
|
||||
## Status: Accepted
|
||||
|
||||
## Context
|
||||
|
||||
OmniRoute is an AI routing gateway that translates, forwards, and manages requests across 20+ LLM providers. We needed a framework that could serve both the API proxy layer and a management dashboard from a single codebase.
|
||||
|
||||
**Alternatives considered:**
|
||||
|
||||
- **Express.js only** — Simpler proxy, but requires separate frontend tooling
|
||||
- **Fastify** — Fast, but no built-in SSR/dashboard support
|
||||
- **Next.js** — Unified full-stack framework with API routes, SSR, and static pages
|
||||
|
||||
## Decision
|
||||
|
||||
We chose Next.js because:
|
||||
|
||||
1. **Single deployment** — API routes (`/api/*`) and dashboard UI in one process
|
||||
2. **Middleware layer** — Native request interception for auth guards and request tracing
|
||||
3. **File-based routing** — Easy to map provider endpoints to handlers
|
||||
4. **Built-in TypeScript** — Type safety across the entire codebase
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
|
||||
- One `npm run build` produces both API and UI
|
||||
- Middleware provides centralized auth and request tracing
|
||||
- Dashboard gets automatic code splitting and optimization
|
||||
|
||||
**Negative:**
|
||||
|
||||
- Next.js middleware has limitations (no heavy imports, edge runtime constraints)
|
||||
- Serverless deployment model doesn't align with persistent WebSocket/SSE connections
|
||||
- Build times are longer than Express-only setups
|
||||
- The SSE proxy layer (`open-sse/`) operates outside Next.js conventions
|
||||
@@ -1,37 +0,0 @@
|
||||
# ADR-002: Hub-and-Spoke Translation with OpenAI as Intermediate Format
|
||||
|
||||
## Status: Accepted
|
||||
|
||||
## Context
|
||||
|
||||
OmniRoute routes requests across 20+ providers, each with its own API format (OpenAI, Anthropic Messages, Google Gemini, AWS Bedrock, etc.). Direct provider-to-provider translation would require O(n²) translators.
|
||||
|
||||
**Alternatives considered:**
|
||||
|
||||
- **Direct translation** — Each pair needs a dedicated translator (n² complexity)
|
||||
- **Common intermediate format** — Translate to/from a canonical format (2n complexity)
|
||||
- **Protocol buffers** — Strong typing but heavy overhead for a proxy
|
||||
|
||||
## Decision
|
||||
|
||||
We use the **OpenAI Chat Completions format** as the canonical intermediate representation. All incoming requests are normalized to OpenAI format, processed, then translated to the target provider's format.
|
||||
|
||||
```
|
||||
Client → [any format] → OpenAI canonical → [target format] → Provider
|
||||
Provider → [response] → OpenAI canonical → [original format] → Client
|
||||
```
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
|
||||
- Only 2 translators per provider (inbound + outbound) instead of n² pairs
|
||||
- OpenAI format is the de facto standard — most clients already use it
|
||||
- Adding a new provider requires only implementing one translator pair
|
||||
- Streaming (SSE) works consistently through the canonical format
|
||||
|
||||
**Negative:**
|
||||
|
||||
- Some provider-specific features may be lost in translation
|
||||
- The double translation adds latency (typically < 5ms)
|
||||
- OpenAI format changes require updating the canonical representation
|
||||
@@ -1,39 +0,0 @@
|
||||
# ADR-003: Dual Storage — SQLite Primary with JSON Migration Path
|
||||
|
||||
## Status: Accepted
|
||||
|
||||
## Context
|
||||
|
||||
OmniRoute originally used LowDB (JSON file) for all persistence. As the project grew, JSON-based storage became a bottleneck for concurrent access, querying, and data integrity.
|
||||
|
||||
**Alternatives considered:**
|
||||
|
||||
- **LowDB only** — Simple but no concurrent access, no ACID, no querying
|
||||
- **SQLite only** — Fast, ACID-compliant, but breaks existing deployments
|
||||
- **PostgreSQL** — Production-grade but requires external dependency
|
||||
- **Dual storage with migration** — SQLite primary + automatic JSON migration
|
||||
|
||||
## Decision
|
||||
|
||||
We migrated to **SQLite as the primary store** with an automatic one-time migration from `db.json`:
|
||||
|
||||
1. On startup, if `db.json` exists and SQLite is empty, auto-migrate all data
|
||||
2. All new reads/writes go through SQLite
|
||||
3. The `db.json` file is preserved but no longer written to
|
||||
|
||||
Settings remain in a hybrid model where LowDB handles simple key-value configuration for backward compatibility.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
|
||||
- ACID transactions for provider connections, API keys, and usage data
|
||||
- Proper SQL queries for analytics and log filtering
|
||||
- Concurrent read/write safety via WAL mode
|
||||
- Zero-downtime migration from JSON — users upgrade transparently
|
||||
|
||||
**Negative:**
|
||||
|
||||
- Two storage engines to maintain (SQLite + LowDB for settings)
|
||||
- Migration code must handle edge cases and partial data
|
||||
- SQLite binary dependency needed in deployment environments
|
||||
@@ -0,0 +1,12 @@
|
||||
# Multilingual Documentation
|
||||
|
||||
This directory contains machine-assisted translations based on the English docs.
|
||||
|
||||
- **API_REFERENCE.md**: 🇺🇸 [English](../API_REFERENCE.md) | 🇧🇷 [Português (Brasil)](./pt-BR/API_REFERENCE.md) | 🇪🇸 [Español](./es/API_REFERENCE.md) | 🇫🇷 [Français](./fr/API_REFERENCE.md) | 🇮🇹 [Italiano](./it/API_REFERENCE.md) | 🇷🇺 [Русский](./ru/API_REFERENCE.md) | 🇨🇳 [中文 (简体)](./zh-CN/API_REFERENCE.md) | 🇩🇪 [Deutsch](./de/API_REFERENCE.md) | 🇮🇳 [हिन्दी](./in/API_REFERENCE.md) | 🇹🇭 [ไทย](./th/API_REFERENCE.md) | 🇺🇦 [Українська](./uk-UA/API_REFERENCE.md) | 🇸🇦 [العربية](./ar/API_REFERENCE.md) | 🇯🇵 [日本語](./ja/API_REFERENCE.md) | 🇻🇳 [Tiếng Việt](./vi/API_REFERENCE.md) | 🇧🇬 [Български](./bg/API_REFERENCE.md) | 🇩🇰 [Dansk](./da/API_REFERENCE.md) | 🇫🇮 [Suomi](./fi/API_REFERENCE.md) | 🇮🇱 [עברית](./he/API_REFERENCE.md) | 🇭🇺 [Magyar](./hu/API_REFERENCE.md) | 🇮🇩 [Bahasa Indonesia](./id/API_REFERENCE.md) | 🇰🇷 [한국어](./ko/API_REFERENCE.md) | 🇲🇾 [Bahasa Melayu](./ms/API_REFERENCE.md) | 🇳🇱 [Nederlands](./nl/API_REFERENCE.md) | 🇳🇴 [Norsk](./no/API_REFERENCE.md) | 🇵🇹 [Português (Portugal)](./pt/API_REFERENCE.md) | 🇷🇴 [Română](./ro/API_REFERENCE.md) | 🇵🇱 [Polski](./pl/API_REFERENCE.md) | 🇸🇰 [Slovenčina](./sk/API_REFERENCE.md) | 🇸🇪 [Svenska](./sv/API_REFERENCE.md) | 🇵🇭 [Filipino](./phi/API_REFERENCE.md)
|
||||
- **ARCHITECTURE.md**: 🇺🇸 [English](../ARCHITECTURE.md) | 🇧🇷 [Português (Brasil)](./pt-BR/ARCHITECTURE.md) | 🇪🇸 [Español](./es/ARCHITECTURE.md) | 🇫🇷 [Français](./fr/ARCHITECTURE.md) | 🇮🇹 [Italiano](./it/ARCHITECTURE.md) | 🇷🇺 [Русский](./ru/ARCHITECTURE.md) | 🇨🇳 [中文 (简体)](./zh-CN/ARCHITECTURE.md) | 🇩🇪 [Deutsch](./de/ARCHITECTURE.md) | 🇮🇳 [हिन्दी](./in/ARCHITECTURE.md) | 🇹🇭 [ไทย](./th/ARCHITECTURE.md) | 🇺🇦 [Українська](./uk-UA/ARCHITECTURE.md) | 🇸🇦 [العربية](./ar/ARCHITECTURE.md) | 🇯🇵 [日本語](./ja/ARCHITECTURE.md) | 🇻🇳 [Tiếng Việt](./vi/ARCHITECTURE.md) | 🇧🇬 [Български](./bg/ARCHITECTURE.md) | 🇩🇰 [Dansk](./da/ARCHITECTURE.md) | 🇫🇮 [Suomi](./fi/ARCHITECTURE.md) | 🇮🇱 [עברית](./he/ARCHITECTURE.md) | 🇭🇺 [Magyar](./hu/ARCHITECTURE.md) | 🇮🇩 [Bahasa Indonesia](./id/ARCHITECTURE.md) | 🇰🇷 [한국어](./ko/ARCHITECTURE.md) | 🇲🇾 [Bahasa Melayu](./ms/ARCHITECTURE.md) | 🇳🇱 [Nederlands](./nl/ARCHITECTURE.md) | 🇳🇴 [Norsk](./no/ARCHITECTURE.md) | 🇵🇹 [Português (Portugal)](./pt/ARCHITECTURE.md) | 🇷🇴 [Română](./ro/ARCHITECTURE.md) | 🇵🇱 [Polski](./pl/ARCHITECTURE.md) | 🇸🇰 [Slovenčina](./sk/ARCHITECTURE.md) | 🇸🇪 [Svenska](./sv/ARCHITECTURE.md) | 🇵🇭 [Filipino](./phi/ARCHITECTURE.md)
|
||||
- **CODEBASE_DOCUMENTATION.md**: 🇺🇸 [English](../CODEBASE_DOCUMENTATION.md) | 🇧🇷 [Português (Brasil)](./pt-BR/CODEBASE_DOCUMENTATION.md) | 🇪🇸 [Español](./es/CODEBASE_DOCUMENTATION.md) | 🇫🇷 [Français](./fr/CODEBASE_DOCUMENTATION.md) | 🇮🇹 [Italiano](./it/CODEBASE_DOCUMENTATION.md) | 🇷🇺 [Русский](./ru/CODEBASE_DOCUMENTATION.md) | 🇨🇳 [中文 (简体)](./zh-CN/CODEBASE_DOCUMENTATION.md) | 🇩🇪 [Deutsch](./de/CODEBASE_DOCUMENTATION.md) | 🇮🇳 [हिन्दी](./in/CODEBASE_DOCUMENTATION.md) | 🇹🇭 [ไทย](./th/CODEBASE_DOCUMENTATION.md) | 🇺🇦 [Українська](./uk-UA/CODEBASE_DOCUMENTATION.md) | 🇸🇦 [العربية](./ar/CODEBASE_DOCUMENTATION.md) | 🇯🇵 [日本語](./ja/CODEBASE_DOCUMENTATION.md) | 🇻🇳 [Tiếng Việt](./vi/CODEBASE_DOCUMENTATION.md) | 🇧🇬 [Български](./bg/CODEBASE_DOCUMENTATION.md) | 🇩🇰 [Dansk](./da/CODEBASE_DOCUMENTATION.md) | 🇫🇮 [Suomi](./fi/CODEBASE_DOCUMENTATION.md) | 🇮🇱 [עברית](./he/CODEBASE_DOCUMENTATION.md) | 🇭🇺 [Magyar](./hu/CODEBASE_DOCUMENTATION.md) | 🇮🇩 [Bahasa Indonesia](./id/CODEBASE_DOCUMENTATION.md) | 🇰🇷 [한국어](./ko/CODEBASE_DOCUMENTATION.md) | 🇲🇾 [Bahasa Melayu](./ms/CODEBASE_DOCUMENTATION.md) | 🇳🇱 [Nederlands](./nl/CODEBASE_DOCUMENTATION.md) | 🇳🇴 [Norsk](./no/CODEBASE_DOCUMENTATION.md) | 🇵🇹 [Português (Portugal)](./pt/CODEBASE_DOCUMENTATION.md) | 🇷🇴 [Română](./ro/CODEBASE_DOCUMENTATION.md) | 🇵🇱 [Polski](./pl/CODEBASE_DOCUMENTATION.md) | 🇸🇰 [Slovenčina](./sk/CODEBASE_DOCUMENTATION.md) | 🇸🇪 [Svenska](./sv/CODEBASE_DOCUMENTATION.md) | 🇵🇭 [Filipino](./phi/CODEBASE_DOCUMENTATION.md)
|
||||
- **FEATURES.md**: 🇺🇸 [English](../FEATURES.md) | 🇧🇷 [Português (Brasil)](./pt-BR/FEATURES.md) | 🇪🇸 [Español](./es/FEATURES.md) | 🇫🇷 [Français](./fr/FEATURES.md) | 🇮🇹 [Italiano](./it/FEATURES.md) | 🇷🇺 [Русский](./ru/FEATURES.md) | 🇨🇳 [中文 (简体)](./zh-CN/FEATURES.md) | 🇩🇪 [Deutsch](./de/FEATURES.md) | 🇮🇳 [हिन्दी](./in/FEATURES.md) | 🇹🇭 [ไทย](./th/FEATURES.md) | 🇺🇦 [Українська](./uk-UA/FEATURES.md) | 🇸🇦 [العربية](./ar/FEATURES.md) | 🇯🇵 [日本語](./ja/FEATURES.md) | 🇻🇳 [Tiếng Việt](./vi/FEATURES.md) | 🇧🇬 [Български](./bg/FEATURES.md) | 🇩🇰 [Dansk](./da/FEATURES.md) | 🇫🇮 [Suomi](./fi/FEATURES.md) | 🇮🇱 [עברית](./he/FEATURES.md) | 🇭🇺 [Magyar](./hu/FEATURES.md) | 🇮🇩 [Bahasa Indonesia](./id/FEATURES.md) | 🇰🇷 [한국어](./ko/FEATURES.md) | 🇲🇾 [Bahasa Melayu](./ms/FEATURES.md) | 🇳🇱 [Nederlands](./nl/FEATURES.md) | 🇳🇴 [Norsk](./no/FEATURES.md) | 🇵🇹 [Português (Portugal)](./pt/FEATURES.md) | 🇷🇴 [Română](./ro/FEATURES.md) | 🇵🇱 [Polski](./pl/FEATURES.md) | 🇸🇰 [Slovenčina](./sk/FEATURES.md) | 🇸🇪 [Svenska](./sv/FEATURES.md) | 🇵🇭 [Filipino](./phi/FEATURES.md)
|
||||
- **TROUBLESHOOTING.md**: 🇺🇸 [English](../TROUBLESHOOTING.md) | 🇧🇷 [Português (Brasil)](./pt-BR/TROUBLESHOOTING.md) | 🇪🇸 [Español](./es/TROUBLESHOOTING.md) | 🇫🇷 [Français](./fr/TROUBLESHOOTING.md) | 🇮🇹 [Italiano](./it/TROUBLESHOOTING.md) | 🇷🇺 [Русский](./ru/TROUBLESHOOTING.md) | 🇨🇳 [中文 (简体)](./zh-CN/TROUBLESHOOTING.md) | 🇩🇪 [Deutsch](./de/TROUBLESHOOTING.md) | 🇮🇳 [हिन्दी](./in/TROUBLESHOOTING.md) | 🇹🇭 [ไทย](./th/TROUBLESHOOTING.md) | 🇺🇦 [Українська](./uk-UA/TROUBLESHOOTING.md) | 🇸🇦 [العربية](./ar/TROUBLESHOOTING.md) | 🇯🇵 [日本語](./ja/TROUBLESHOOTING.md) | 🇻🇳 [Tiếng Việt](./vi/TROUBLESHOOTING.md) | 🇧🇬 [Български](./bg/TROUBLESHOOTING.md) | 🇩🇰 [Dansk](./da/TROUBLESHOOTING.md) | 🇫🇮 [Suomi](./fi/TROUBLESHOOTING.md) | 🇮🇱 [עברית](./he/TROUBLESHOOTING.md) | 🇭🇺 [Magyar](./hu/TROUBLESHOOTING.md) | 🇮🇩 [Bahasa Indonesia](./id/TROUBLESHOOTING.md) | 🇰🇷 [한국어](./ko/TROUBLESHOOTING.md) | 🇲🇾 [Bahasa Melayu](./ms/TROUBLESHOOTING.md) | 🇳🇱 [Nederlands](./nl/TROUBLESHOOTING.md) | 🇳🇴 [Norsk](./no/TROUBLESHOOTING.md) | 🇵🇹 [Português (Portugal)](./pt/TROUBLESHOOTING.md) | 🇷🇴 [Română](./ro/TROUBLESHOOTING.md) | 🇵🇱 [Polski](./pl/TROUBLESHOOTING.md) | 🇸🇰 [Slovenčina](./sk/TROUBLESHOOTING.md) | 🇸🇪 [Svenska](./sv/TROUBLESHOOTING.md) | 🇵🇭 [Filipino](./phi/TROUBLESHOOTING.md)
|
||||
- **USER_GUIDE.md**: 🇺🇸 [English](../USER_GUIDE.md) | 🇧🇷 [Português (Brasil)](./pt-BR/USER_GUIDE.md) | 🇪🇸 [Español](./es/USER_GUIDE.md) | 🇫🇷 [Français](./fr/USER_GUIDE.md) | 🇮🇹 [Italiano](./it/USER_GUIDE.md) | 🇷🇺 [Русский](./ru/USER_GUIDE.md) | 🇨🇳 [中文 (简体)](./zh-CN/USER_GUIDE.md) | 🇩🇪 [Deutsch](./de/USER_GUIDE.md) | 🇮🇳 [हिन्दी](./in/USER_GUIDE.md) | 🇹🇭 [ไทย](./th/USER_GUIDE.md) | 🇺🇦 [Українська](./uk-UA/USER_GUIDE.md) | 🇸🇦 [العربية](./ar/USER_GUIDE.md) | 🇯🇵 [日本語](./ja/USER_GUIDE.md) | 🇻🇳 [Tiếng Việt](./vi/USER_GUIDE.md) | 🇧🇬 [Български](./bg/USER_GUIDE.md) | 🇩🇰 [Dansk](./da/USER_GUIDE.md) | 🇫🇮 [Suomi](./fi/USER_GUIDE.md) | 🇮🇱 [עברית](./he/USER_GUIDE.md) | 🇭🇺 [Magyar](./hu/USER_GUIDE.md) | 🇮🇩 [Bahasa Indonesia](./id/USER_GUIDE.md) | 🇰🇷 [한국어](./ko/USER_GUIDE.md) | 🇲🇾 [Bahasa Melayu](./ms/USER_GUIDE.md) | 🇳🇱 [Nederlands](./nl/USER_GUIDE.md) | 🇳🇴 [Norsk](./no/USER_GUIDE.md) | 🇵🇹 [Português (Portugal)](./pt/USER_GUIDE.md) | 🇷🇴 [Română](./ro/USER_GUIDE.md) | 🇵🇱 [Polski](./pl/USER_GUIDE.md) | 🇸🇰 [Slovenčina](./sk/USER_GUIDE.md) | 🇸🇪 [Svenska](./sv/USER_GUIDE.md) | 🇵🇭 [Filipino](./phi/USER_GUIDE.md)
|
||||
|
||||
Generated on 2026-02-26.
|
||||
@@ -0,0 +1,200 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/A2A-SERVER.md) · 🇪🇸 [es](../es/A2A-SERVER.md) · 🇫🇷 [fr](../fr/A2A-SERVER.md) · 🇩🇪 [de](../de/A2A-SERVER.md) · 🇮🇹 [it](../it/A2A-SERVER.md) · 🇷🇺 [ru](../ru/A2A-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/A2A-SERVER.md) · 🇯🇵 [ja](../ja/A2A-SERVER.md) · 🇰🇷 [ko](../ko/A2A-SERVER.md) · 🇸🇦 [ar](../ar/A2A-SERVER.md) · 🇮🇳 [in](../in/A2A-SERVER.md) · 🇹🇭 [th](../th/A2A-SERVER.md) · 🇻🇳 [vi](../vi/A2A-SERVER.md) · 🇮🇩 [id](../id/A2A-SERVER.md) · 🇲🇾 [ms](../ms/A2A-SERVER.md) · 🇳🇱 [nl](../nl/A2A-SERVER.md) · 🇵🇱 [pl](../pl/A2A-SERVER.md) · 🇸🇪 [sv](../sv/A2A-SERVER.md) · 🇳🇴 [no](../no/A2A-SERVER.md) · 🇩🇰 [da](../da/A2A-SERVER.md) · 🇫🇮 [fi](../fi/A2A-SERVER.md) · 🇵🇹 [pt](../pt/A2A-SERVER.md) · 🇷🇴 [ro](../ro/A2A-SERVER.md) · 🇭🇺 [hu](../hu/A2A-SERVER.md) · 🇧🇬 [bg](../bg/A2A-SERVER.md) · 🇸🇰 [sk](../sk/A2A-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/A2A-SERVER.md) · 🇮🇱 [he](../he/A2A-SERVER.md) · 🇵🇭 [phi](../phi/A2A-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute A2A Server Documentation
|
||||
|
||||
> Agent-to-Agent Protocol v0.3 — OmniRoute as an intelligent routing agent
|
||||
|
||||
## Agent Discovery
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/.well-known/agent.json
|
||||
```
|
||||
|
||||
Returns the Agent Card describing OmniRoute's capabilities, skills, and authentication requirements.
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
All `/a2a` requests require an API key via the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer YOUR_OMNIROUTE_API_KEY
|
||||
```
|
||||
|
||||
If no API key is configured on the server, authentication is bypassed.
|
||||
|
||||
---
|
||||
|
||||
## JSON-RPC 2.0 Methods
|
||||
|
||||
### `message/send` — Synchronous Execution
|
||||
|
||||
Sends a message to a skill and waits for the complete response.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Write a hello world in Python"}],
|
||||
"metadata": {"model": "auto", "combo": "fast-coding"}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"result": {
|
||||
"task": { "id": "uuid", "state": "completed" },
|
||||
"artifacts": [{ "type": "text", "content": "..." }],
|
||||
"metadata": {
|
||||
"routing_explanation": "Selected claude-sonnet via provider \"anthropic\" (latency: 1200ms, cost: $0.003)",
|
||||
"cost_envelope": { "estimated": 0.005, "actual": 0.003, "currency": "USD" },
|
||||
"resilience_trace": [
|
||||
{ "event": "primary_selected", "provider": "anthropic", "timestamp": "..." }
|
||||
],
|
||||
"policy_verdict": { "allowed": true, "reason": "within budget and quota limits" }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `message/stream` — SSE Streaming
|
||||
|
||||
Same as `message/send` but returns Server-Sent Events for real-time streaming.
|
||||
|
||||
```bash
|
||||
curl -N -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/stream",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Explain quantum computing"}]
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**SSE Events:**
|
||||
|
||||
```
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"working"},"chunk":{"type":"text","content":"..."}}}
|
||||
|
||||
: heartbeat 2026-03-03T17:00:00Z
|
||||
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"completed"},"metadata":{...}}}
|
||||
```
|
||||
|
||||
### `tasks/get` — Query Task Status
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"2","method":"tasks/get","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
### `tasks/cancel` — Cancel a Task
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"3","method":"tasks/cancel","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Skills
|
||||
|
||||
| Skill | Description |
|
||||
| :----------------- | :------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `smart-routing` | Routes prompts through OmniRoute's intelligent pipeline. Returns response with routing explanation, cost, and resilience trace. |
|
||||
| `quota-management` | Answers natural-language queries about provider quotas, suggests free combos, and provides quota rankings. |
|
||||
|
||||
---
|
||||
|
||||
## Task Lifecycle
|
||||
|
||||
```
|
||||
submitted → working → completed
|
||||
→ failed
|
||||
→ cancelled
|
||||
```
|
||||
|
||||
- Tasks expire after 5 minutes (configurable)
|
||||
- Terminal states: `completed`, `failed`, `cancelled`
|
||||
- Event log tracks every state transition
|
||||
|
||||
---
|
||||
|
||||
## Error Codes
|
||||
|
||||
| Code | Meaning |
|
||||
| :----- | :----------------------------- |
|
||||
| -32700 | Parse error (invalid JSON) |
|
||||
| -32600 | Invalid request / Unauthorized |
|
||||
| -32601 | Method or skill not found |
|
||||
| -32602 | Invalid params |
|
||||
| -32603 | Internal error |
|
||||
|
||||
---
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### Python (requests)
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
resp = requests.post("http://localhost:20128/a2a", json={
|
||||
"jsonrpc": "2.0", "id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}
|
||||
}, headers={"Authorization": "Bearer YOUR_KEY"})
|
||||
|
||||
result = resp.json()["result"]
|
||||
print(result["artifacts"][0]["content"])
|
||||
print(result["metadata"]["routing_explanation"])
|
||||
```
|
||||
|
||||
### TypeScript (fetch)
|
||||
|
||||
```typescript
|
||||
const resp = await fetch("http://localhost:20128/a2a", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: "Bearer YOUR_KEY",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: "1",
|
||||
method: "message/send",
|
||||
params: {
|
||||
skill: "smart-routing",
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
const { result } = await resp.json();
|
||||
console.log(result.metadata.routing_explanation);
|
||||
```
|
||||
@@ -0,0 +1,455 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/API_REFERENCE.md) · 🇪🇸 [es](../es/API_REFERENCE.md) · 🇫🇷 [fr](../fr/API_REFERENCE.md) · 🇩🇪 [de](../de/API_REFERENCE.md) · 🇮🇹 [it](../it/API_REFERENCE.md) · 🇷🇺 [ru](../ru/API_REFERENCE.md) · 🇨🇳 [zh-CN](../zh-CN/API_REFERENCE.md) · 🇯🇵 [ja](../ja/API_REFERENCE.md) · 🇰🇷 [ko](../ko/API_REFERENCE.md) · 🇸🇦 [ar](../ar/API_REFERENCE.md) · 🇮🇳 [in](../in/API_REFERENCE.md) · 🇹🇭 [th](../th/API_REFERENCE.md) · 🇻🇳 [vi](../vi/API_REFERENCE.md) · 🇮🇩 [id](../id/API_REFERENCE.md) · 🇲🇾 [ms](../ms/API_REFERENCE.md) · 🇳🇱 [nl](../nl/API_REFERENCE.md) · 🇵🇱 [pl](../pl/API_REFERENCE.md) · 🇸🇪 [sv](../sv/API_REFERENCE.md) · 🇳🇴 [no](../no/API_REFERENCE.md) · 🇩🇰 [da](../da/API_REFERENCE.md) · 🇫🇮 [fi](../fi/API_REFERENCE.md) · 🇵🇹 [pt](../pt/API_REFERENCE.md) · 🇷🇴 [ro](../ro/API_REFERENCE.md) · 🇭🇺 [hu](../hu/API_REFERENCE.md) · 🇧🇬 [bg](../bg/API_REFERENCE.md) · 🇸🇰 [sk](../sk/API_REFERENCE.md) · 🇺🇦 [uk-UA](../uk-UA/API_REFERENCE.md) · 🇮🇱 [he](../he/API_REFERENCE.md) · 🇵🇭 [phi](../phi/API_REFERENCE.md)
|
||||
|
||||
---
|
||||
|
||||
# API Reference
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](API_REFERENCE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/API_REFERENCE.md) | 🇪🇸 [Español](i18n/es/API_REFERENCE.md) | 🇫🇷 [Français](i18n/fr/API_REFERENCE.md) | 🇮🇹 [Italiano](i18n/it/API_REFERENCE.md) | 🇷🇺 [Русский](i18n/ru/API_REFERENCE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/API_REFERENCE.md) | 🇩🇪 [Deutsch](i18n/de/API_REFERENCE.md) | 🇮🇳 [हिन्दी](i18n/in/API_REFERENCE.md) | 🇹🇭 [ไทย](i18n/th/API_REFERENCE.md) | 🇺🇦 [Українська](i18n/uk-UA/API_REFERENCE.md) | 🇸🇦 [العربية](i18n/ar/API_REFERENCE.md) | 🇯🇵 [日本語](i18n/ja/API_REFERENCE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/API_REFERENCE.md) | 🇧🇬 [Български](i18n/bg/API_REFERENCE.md) | 🇩🇰 [Dansk](i18n/da/API_REFERENCE.md) | 🇫🇮 [Suomi](i18n/fi/API_REFERENCE.md) | 🇮🇱 [עברית](i18n/he/API_REFERENCE.md) | 🇭🇺 [Magyar](i18n/hu/API_REFERENCE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/API_REFERENCE.md) | 🇰🇷 [한국어](i18n/ko/API_REFERENCE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/API_REFERENCE.md) | 🇳🇱 [Nederlands](i18n/nl/API_REFERENCE.md) | 🇳🇴 [Norsk](i18n/no/API_REFERENCE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/API_REFERENCE.md) | 🇷🇴 [Română](i18n/ro/API_REFERENCE.md) | 🇵🇱 [Polski](i18n/pl/API_REFERENCE.md) | 🇸🇰 [Slovenčina](i18n/sk/API_REFERENCE.md) | 🇸🇪 [Svenska](i18n/sv/API_REFERENCE.md) | 🇵🇭 [Filipino](i18n/phi/API_REFERENCE.md)
|
||||
|
||||
Complete reference for all OmniRoute API endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Chat Completions](#chat-completions)
|
||||
- [Embeddings](#embeddings)
|
||||
- [Image Generation](#image-generation)
|
||||
- [List Models](#list-models)
|
||||
- [Compatibility Endpoints](#compatibility-endpoints)
|
||||
- [Semantic Cache](#semantic-cache)
|
||||
- [Dashboard & Management](#dashboard--management)
|
||||
- [Request Processing](#request-processing)
|
||||
- [Authentication](#authentication)
|
||||
|
||||
---
|
||||
|
||||
## Chat Completions
|
||||
|
||||
```bash
|
||||
POST /v1/chat/completions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "cc/claude-opus-4-6",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Write a function to..."}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
```
|
||||
|
||||
### Custom Headers
|
||||
|
||||
| Header | Direction | Description |
|
||||
| ------------------------ | --------- | --------------------------------- |
|
||||
| `X-OmniRoute-No-Cache` | Request | Set to `true` to bypass cache |
|
||||
| `X-OmniRoute-Progress` | Request | Set to `true` for progress events |
|
||||
| `Idempotency-Key` | Request | Dedup key (5s window) |
|
||||
| `X-Request-Id` | Request | Alternative dedup key |
|
||||
| `X-OmniRoute-Cache` | Response | `HIT` or `MISS` (non-streaming) |
|
||||
| `X-OmniRoute-Idempotent` | Response | `true` if deduplicated |
|
||||
| `X-OmniRoute-Progress` | Response | `enabled` if progress tracking on |
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
```bash
|
||||
POST /v1/embeddings
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "nebius/Qwen/Qwen3-Embedding-8B",
|
||||
"input": "The food was delicious"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: Nebius, OpenAI, Mistral, Together AI, Fireworks, NVIDIA.
|
||||
|
||||
```bash
|
||||
# List all embedding models
|
||||
GET /v1/embeddings
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Image Generation
|
||||
|
||||
```bash
|
||||
POST /v1/images/generations
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "openai/dall-e-3",
|
||||
"prompt": "A beautiful sunset over mountains",
|
||||
"size": "1024x1024"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: OpenAI (DALL-E), xAI (Grok Image), Together AI (FLUX), Fireworks AI.
|
||||
|
||||
```bash
|
||||
# List all image models
|
||||
GET /v1/images/generations
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## List Models
|
||||
|
||||
```bash
|
||||
GET /v1/models
|
||||
Authorization: Bearer your-api-key
|
||||
|
||||
→ Returns all chat, embedding, and image models + combos in OpenAI format
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Compatibility Endpoints
|
||||
|
||||
| Method | Path | Format |
|
||||
| ------ | --------------------------- | ---------------------- |
|
||||
| POST | `/v1/chat/completions` | OpenAI |
|
||||
| POST | `/v1/messages` | Anthropic |
|
||||
| POST | `/v1/responses` | OpenAI Responses |
|
||||
| POST | `/v1/embeddings` | OpenAI |
|
||||
| POST | `/v1/images/generations` | OpenAI |
|
||||
| GET | `/v1/models` | OpenAI |
|
||||
| POST | `/v1/messages/count_tokens` | Anthropic |
|
||||
| GET | `/v1beta/models` | Gemini |
|
||||
| POST | `/v1beta/models/{...path}` | Gemini generateContent |
|
||||
| POST | `/v1/api/chat` | Ollama |
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
```bash
|
||||
POST /v1/providers/{provider}/chat/completions
|
||||
POST /v1/providers/{provider}/embeddings
|
||||
POST /v1/providers/{provider}/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
---
|
||||
|
||||
## Semantic Cache
|
||||
|
||||
```bash
|
||||
# Get cache stats
|
||||
GET /api/cache
|
||||
|
||||
# Clear all caches
|
||||
DELETE /api/cache
|
||||
```
|
||||
|
||||
Response example:
|
||||
|
||||
```json
|
||||
{
|
||||
"semanticCache": {
|
||||
"memorySize": 42,
|
||||
"memoryMaxSize": 500,
|
||||
"dbSize": 128,
|
||||
"hitRate": 0.65
|
||||
},
|
||||
"idempotency": {
|
||||
"activeKeys": 3,
|
||||
"windowMs": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dashboard & Management
|
||||
|
||||
### Authentication
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------------- | ------- | --------------------- |
|
||||
| `/api/auth/login` | POST | Login |
|
||||
| `/api/auth/logout` | POST | Logout |
|
||||
| `/api/settings/require-login` | GET/PUT | Toggle login required |
|
||||
|
||||
### Provider Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------- | --------------- | ------------------------ |
|
||||
| `/api/providers` | GET/POST | List / create providers |
|
||||
| `/api/providers/[id]` | GET/PUT/DELETE | Manage a provider |
|
||||
| `/api/providers/[id]/test` | POST | Test provider connection |
|
||||
| `/api/providers/[id]/models` | GET | List provider models |
|
||||
| `/api/providers/validate` | POST | Validate provider config |
|
||||
| `/api/provider-nodes*` | Various | Provider node management |
|
||||
| `/api/provider-models` | GET/POST/DELETE | Custom models |
|
||||
|
||||
### OAuth Flows
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------------- | ------- | ----------------------- |
|
||||
| `/api/oauth/[provider]/[action]` | Various | Provider-specific OAuth |
|
||||
|
||||
### Routing & Config
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------- | -------- | ----------------------------- |
|
||||
| `/api/models/alias` | GET/POST | Model aliases |
|
||||
| `/api/models/catalog` | GET | All models by provider + type |
|
||||
| `/api/combos*` | Various | Combo management |
|
||||
| `/api/keys*` | Various | API key management |
|
||||
| `/api/pricing` | GET | Model pricing |
|
||||
|
||||
### Usage & Analytics
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | -------------------- |
|
||||
| `/api/usage/history` | GET | Usage history |
|
||||
| `/api/usage/logs` | GET | Usage logs |
|
||||
| `/api/usage/request-logs` | GET | Request-level logs |
|
||||
| `/api/usage/[connectionId]` | GET | Per-connection usage |
|
||||
|
||||
### Settings
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------------- | ------- | ---------------------- |
|
||||
| `/api/settings` | GET/PUT | General settings |
|
||||
| `/api/settings/proxy` | GET/PUT | Network proxy config |
|
||||
| `/api/settings/proxy/test` | POST | Test proxy connection |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt |
|
||||
|
||||
### Monitoring
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------ | ---------- | ----------------------- |
|
||||
| `/api/sessions` | GET | Active session tracking |
|
||||
| `/api/rate-limits` | GET | Per-account rate limits |
|
||||
| `/api/monitoring/health` | GET | Health check |
|
||||
| `/api/cache` | GET/DELETE | Cache stats / clear |
|
||||
|
||||
### Backup & Export/Import
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | --------------------------------------- |
|
||||
| `/api/db-backups` | GET | List available backups |
|
||||
| `/api/db-backups` | PUT | Create a manual backup |
|
||||
| `/api/db-backups` | POST | Restore from a specific backup |
|
||||
| `/api/db-backups/export` | GET | Download database as .sqlite file |
|
||||
| `/api/db-backups/import` | POST | Upload .sqlite file to replace database |
|
||||
| `/api/db-backups/exportAll` | GET | Download full backup as .tar.gz archive |
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------- | ------- | --------------------- |
|
||||
| `/api/sync/cloud` | Various | Cloud sync operations |
|
||||
| `/api/sync/initialize` | POST | Initialize sync |
|
||||
| `/api/cloud/*` | Various | Cloud management |
|
||||
|
||||
### CLI Tools
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------------- | ------ | ------------------- |
|
||||
| `/api/cli-tools/claude-settings` | GET | Claude CLI status |
|
||||
| `/api/cli-tools/codex-settings` | GET | Codex CLI status |
|
||||
| `/api/cli-tools/droid-settings` | GET | Droid CLI status |
|
||||
| `/api/cli-tools/openclaw-settings` | GET | OpenClaw CLI status |
|
||||
| `/api/cli-tools/runtime/[toolId]` | GET | Generic CLI runtime |
|
||||
|
||||
CLI responses include: `installed`, `runnable`, `command`, `commandPath`, `runtimeMode`, `reason`.
|
||||
|
||||
### ACP Agents
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------- | ------ | -------------------------------------------------------- |
|
||||
| `/api/acp/agents` | GET | List all detected agents (built-in + custom) with status |
|
||||
| `/api/acp/agents` | POST | Add custom agent or refresh detection cache |
|
||||
| `/api/acp/agents` | DELETE | Remove a custom agent by `id` query param |
|
||||
|
||||
GET response includes `agents[]` (id, name, binary, version, installed, protocol, isCustom) and `summary` (total, installed, notFound, builtIn, custom).
|
||||
|
||||
### Resilience & Rate Limits
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------- | ------- | ------------------------------- |
|
||||
| `/api/resilience` | GET/PUT | Get/update resilience profiles |
|
||||
| `/api/resilience/reset` | POST | Reset circuit breakers |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
| `/api/rate-limit` | GET | Global rate limit configuration |
|
||||
|
||||
### Evals
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------ | -------- | --------------------------------- |
|
||||
| `/api/evals` | GET/POST | List eval suites / run evaluation |
|
||||
|
||||
### Policies
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | --------------- | ----------------------- |
|
||||
| `/api/policies` | GET/POST/DELETE | Manage routing policies |
|
||||
|
||||
### Compliance
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | ----------------------------- |
|
||||
| `/api/compliance/audit-log` | GET | Compliance audit log (last N) |
|
||||
|
||||
### v1beta (Gemini-Compatible)
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------- | ------ | --------------------------------- |
|
||||
| `/v1beta/models` | GET | List models in Gemini format |
|
||||
| `/v1beta/models/{...path}` | POST | Gemini `generateContent` endpoint |
|
||||
|
||||
These endpoints mirror Gemini's API format for clients that expect native Gemini SDK compatibility.
|
||||
|
||||
### Internal / System APIs
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | ------ | ---------------------------------------------------- |
|
||||
| `/api/init` | GET | Application initialization check (used on first run) |
|
||||
| `/api/tags` | GET | Ollama-compatible model tags (for Ollama clients) |
|
||||
| `/api/restart` | POST | Trigger graceful server restart |
|
||||
| `/api/shutdown` | POST | Trigger graceful server shutdown |
|
||||
|
||||
> **Note:** These endpoints are used internally by the system or for Ollama client compatibility. They are not typically called by end users.
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
```
|
||||
|
||||
Transcribe audio files using Deepgram or AssemblyAI.
|
||||
|
||||
**Request:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@recording.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "Hello, this is the transcribed audio content.",
|
||||
"task": "transcribe",
|
||||
"language": "en",
|
||||
"duration": 12.5
|
||||
}
|
||||
```
|
||||
|
||||
**Supported providers:** `deepgram/nova-3`, `assemblyai/best`.
|
||||
|
||||
**Supported formats:** `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
## Ollama Compatibility
|
||||
|
||||
For clients that use Ollama's API format:
|
||||
|
||||
```bash
|
||||
# Chat endpoint (Ollama format)
|
||||
POST /v1/api/chat
|
||||
|
||||
# Model listing (Ollama format)
|
||||
GET /api/tags
|
||||
```
|
||||
|
||||
Requests are automatically translated between Ollama and internal formats.
|
||||
|
||||
---
|
||||
|
||||
## Telemetry
|
||||
|
||||
```bash
|
||||
# Get latency telemetry summary (p50/p95/p99 per provider)
|
||||
GET /api/telemetry/summary
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"claudeCode": { "p50": 245, "p95": 890, "p99": 1200, "count": 150 },
|
||||
"github": { "p50": 180, "p95": 620, "p99": 950, "count": 320 }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Budget
|
||||
|
||||
```bash
|
||||
# Get budget status for all API keys
|
||||
GET /api/usage/budget
|
||||
|
||||
# Set or update a budget
|
||||
POST /api/usage/budget
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"keyId": "key-123",
|
||||
"limit": 50.00,
|
||||
"period": "monthly"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Availability
|
||||
|
||||
```bash
|
||||
# Get real-time model availability across all providers
|
||||
GET /api/models/availability
|
||||
|
||||
# Check availability for a specific model
|
||||
POST /api/models/availability
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "claude-sonnet-4-5-20250929"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Request Processing
|
||||
|
||||
1. Client sends request to `/v1/*`
|
||||
2. Route handler calls `handleChat`, `handleEmbedding`, `handleAudioTranscription`, or `handleImageGeneration`
|
||||
3. Model is resolved (direct provider/model or alias/combo)
|
||||
4. Credentials selected from local DB with account availability filtering
|
||||
5. For chat: `handleChatCore` — format detection, translation, cache check, idempotency check
|
||||
6. Provider executor sends upstream request
|
||||
7. Response translated back to client format (chat) or returned as-is (embeddings/images/audio)
|
||||
8. Usage/logging recorded
|
||||
9. Fallback applies on errors according to combo rules
|
||||
|
||||
Full architecture reference: [`ARCHITECTURE.md`](ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
- Dashboard routes (`/dashboard/*`) use `auth_token` cookie
|
||||
- Login uses saved password hash; fallback to `INITIAL_PASSWORD`
|
||||
- `requireLogin` toggleable via `/api/settings/require-login`
|
||||
- `/v1/*` routes optionally require Bearer API key when `REQUIRE_API_KEY=true`
|
||||
@@ -0,0 +1,787 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/ARCHITECTURE.md) · 🇪🇸 [es](../es/ARCHITECTURE.md) · 🇫🇷 [fr](../fr/ARCHITECTURE.md) · 🇩🇪 [de](../de/ARCHITECTURE.md) · 🇮🇹 [it](../it/ARCHITECTURE.md) · 🇷🇺 [ru](../ru/ARCHITECTURE.md) · 🇨🇳 [zh-CN](../zh-CN/ARCHITECTURE.md) · 🇯🇵 [ja](../ja/ARCHITECTURE.md) · 🇰🇷 [ko](../ko/ARCHITECTURE.md) · 🇸🇦 [ar](../ar/ARCHITECTURE.md) · 🇮🇳 [in](../in/ARCHITECTURE.md) · 🇹🇭 [th](../th/ARCHITECTURE.md) · 🇻🇳 [vi](../vi/ARCHITECTURE.md) · 🇮🇩 [id](../id/ARCHITECTURE.md) · 🇲🇾 [ms](../ms/ARCHITECTURE.md) · 🇳🇱 [nl](../nl/ARCHITECTURE.md) · 🇵🇱 [pl](../pl/ARCHITECTURE.md) · 🇸🇪 [sv](../sv/ARCHITECTURE.md) · 🇳🇴 [no](../no/ARCHITECTURE.md) · 🇩🇰 [da](../da/ARCHITECTURE.md) · 🇫🇮 [fi](../fi/ARCHITECTURE.md) · 🇵🇹 [pt](../pt/ARCHITECTURE.md) · 🇷🇴 [ro](../ro/ARCHITECTURE.md) · 🇭🇺 [hu](../hu/ARCHITECTURE.md) · 🇧🇬 [bg](../bg/ARCHITECTURE.md) · 🇸🇰 [sk](../sk/ARCHITECTURE.md) · 🇺🇦 [uk-UA](../uk-UA/ARCHITECTURE.md) · 🇮🇱 [he](../he/ARCHITECTURE.md) · 🇵🇭 [phi](../phi/ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Architecture
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](ARCHITECTURE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/ARCHITECTURE.md) | 🇪🇸 [Español](i18n/es/ARCHITECTURE.md) | 🇫🇷 [Français](i18n/fr/ARCHITECTURE.md) | 🇮🇹 [Italiano](i18n/it/ARCHITECTURE.md) | 🇷🇺 [Русский](i18n/ru/ARCHITECTURE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/ARCHITECTURE.md) | 🇩🇪 [Deutsch](i18n/de/ARCHITECTURE.md) | 🇮🇳 [हिन्दी](i18n/in/ARCHITECTURE.md) | 🇹🇭 [ไทย](i18n/th/ARCHITECTURE.md) | 🇺🇦 [Українська](i18n/uk-UA/ARCHITECTURE.md) | 🇸🇦 [العربية](i18n/ar/ARCHITECTURE.md) | 🇯🇵 [日本語](i18n/ja/ARCHITECTURE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/ARCHITECTURE.md) | 🇧🇬 [Български](i18n/bg/ARCHITECTURE.md) | 🇩🇰 [Dansk](i18n/da/ARCHITECTURE.md) | 🇫🇮 [Suomi](i18n/fi/ARCHITECTURE.md) | 🇮🇱 [עברית](i18n/he/ARCHITECTURE.md) | 🇭🇺 [Magyar](i18n/hu/ARCHITECTURE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/ARCHITECTURE.md) | 🇰🇷 [한국어](i18n/ko/ARCHITECTURE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/ARCHITECTURE.md) | 🇳🇱 [Nederlands](i18n/nl/ARCHITECTURE.md) | 🇳🇴 [Norsk](i18n/no/ARCHITECTURE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/ARCHITECTURE.md) | 🇷🇴 [Română](i18n/ro/ARCHITECTURE.md) | 🇵🇱 [Polski](i18n/pl/ARCHITECTURE.md) | 🇸🇰 [Slovenčina](i18n/sk/ARCHITECTURE.md) | 🇸🇪 [Svenska](i18n/sv/ARCHITECTURE.md) | 🇵🇭 [Filipino](i18n/phi/ARCHITECTURE.md)
|
||||
|
||||
_Last updated: 2026-03-04_
|
||||
|
||||
## Executive Summary
|
||||
|
||||
OmniRoute is a local AI routing gateway and dashboard built on Next.js.
|
||||
It provides a single OpenAI-compatible endpoint (`/v1/*`) and routes traffic across multiple upstream providers with translation, fallback, token refresh, and usage tracking.
|
||||
|
||||
Core capabilities:
|
||||
|
||||
- OpenAI-compatible API surface for CLI/tools (28 providers)
|
||||
- Request/response translation across provider formats
|
||||
- Model combo fallback (multi-model sequence)
|
||||
- Account-level fallback (multi-account per provider)
|
||||
- OAuth + API-key provider connection management
|
||||
- Embedding generation via `/v1/embeddings` (6 providers, 9 models)
|
||||
- Image generation via `/v1/images/generations` (4 providers, 9 models)
|
||||
- Think tag parsing (`<think>...</think>`) for reasoning models
|
||||
- Response sanitization for strict OpenAI SDK compatibility
|
||||
- Role normalization (developer→system, system→user) for cross-provider compatibility
|
||||
- Structured output conversion (json_schema → Gemini responseSchema)
|
||||
- Local persistence for providers, keys, aliases, combos, settings, pricing
|
||||
- Usage/cost tracking and request logging
|
||||
- Optional cloud sync for multi-device/state sync
|
||||
- IP allowlist/blocklist for API access control
|
||||
- Thinking budget management (passthrough/auto/custom/adaptive)
|
||||
- Global system prompt injection
|
||||
- Session tracking and fingerprinting
|
||||
- Per-account enhanced rate limiting with provider-specific profiles
|
||||
- Circuit breaker pattern for provider resilience
|
||||
- Anti-thundering herd protection with mutex locking
|
||||
- Signature-based request deduplication cache
|
||||
- Domain layer: model availability, cost rules, fallback policy, lockout policy
|
||||
- Domain state persistence (SQLite write-through cache for fallbacks, budgets, lockouts, circuit breakers)
|
||||
- Policy engine for centralized request evaluation (lockout → budget → fallback)
|
||||
- Request telemetry with p50/p95/p99 latency aggregation
|
||||
- Correlation ID (X-Request-Id) for end-to-end tracing
|
||||
- Compliance audit logging with opt-out per API key
|
||||
- Eval framework for LLM quality assurance
|
||||
- Resilience UI dashboard with real-time circuit breaker status
|
||||
- Modular OAuth providers (12 individual modules under `src/lib/oauth/providers/`)
|
||||
|
||||
Primary runtime model:
|
||||
|
||||
- Next.js app routes under `src/app/api/*` implement both dashboard APIs and compatibility APIs
|
||||
- A shared SSE/routing core in `src/sse/*` + `open-sse/*` handles provider execution, translation, streaming, fallback, and usage
|
||||
|
||||
## Scope and Boundaries
|
||||
|
||||
### In Scope
|
||||
|
||||
- Local gateway runtime
|
||||
- Dashboard management APIs
|
||||
- Provider authentication and token refresh
|
||||
- Request translation and SSE streaming
|
||||
- Local state + usage persistence
|
||||
- Optional cloud sync orchestration
|
||||
|
||||
### Out of Scope
|
||||
|
||||
- Cloud service implementation behind `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Provider SLA/control plane outside local process
|
||||
- External CLI binaries themselves (Claude CLI, Codex CLI, etc.)
|
||||
|
||||
## High-Level System Context
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Clients[Developer Clients]
|
||||
C1[Claude Code]
|
||||
C2[Codex CLI]
|
||||
C3[OpenClaw / Droid / Cline / Continue / Roo]
|
||||
C4[Custom OpenAI-compatible clients]
|
||||
BROWSER[Browser Dashboard]
|
||||
end
|
||||
|
||||
subgraph Router[OmniRoute Local Process]
|
||||
API[V1 Compatibility API\n/v1/*]
|
||||
DASH[Dashboard + Management API\n/api/*]
|
||||
CORE[SSE + Translation Core\nopen-sse + src/sse]
|
||||
DB[(storage.sqlite)]
|
||||
UDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph Upstreams[Upstream Providers]
|
||||
P1[OAuth Providers\nClaude/Codex/Gemini/Qwen/iFlow/GitHub/Kiro/Cursor/Antigravity]
|
||||
P2[API Key Providers\nOpenAI/Anthropic/OpenRouter/GLM/Kimi/MiniMax\nDeepSeek/Groq/xAI/Mistral/Perplexity\nTogether/Fireworks/Cerebras/Cohere/NVIDIA]
|
||||
P3[Compatible Nodes\nOpenAI-compatible / Anthropic-compatible]
|
||||
end
|
||||
|
||||
subgraph Cloud[Optional Cloud Sync]
|
||||
CLOUD[Cloud Sync Endpoint\nNEXT_PUBLIC_CLOUD_URL]
|
||||
end
|
||||
|
||||
C1 --> API
|
||||
C2 --> API
|
||||
C3 --> API
|
||||
C4 --> API
|
||||
BROWSER --> DASH
|
||||
|
||||
API --> CORE
|
||||
DASH --> DB
|
||||
CORE --> DB
|
||||
CORE --> UDB
|
||||
|
||||
CORE --> P1
|
||||
CORE --> P2
|
||||
CORE --> P3
|
||||
|
||||
DASH --> CLOUD
|
||||
```
|
||||
|
||||
## Core Runtime Components
|
||||
|
||||
## 1) API and Routing Layer (Next.js App Routes)
|
||||
|
||||
Main directories:
|
||||
|
||||
- `src/app/api/v1/*` and `src/app/api/v1beta/*` for compatibility APIs
|
||||
- `src/app/api/*` for management/configuration APIs
|
||||
- Next rewrites in `next.config.mjs` map `/v1/*` to `/api/v1/*`
|
||||
|
||||
Important compatibility routes:
|
||||
|
||||
- `src/app/api/v1/chat/completions/route.ts`
|
||||
- `src/app/api/v1/messages/route.ts`
|
||||
- `src/app/api/v1/responses/route.ts`
|
||||
- `src/app/api/v1/models/route.ts` — includes custom models with `custom: true`
|
||||
- `src/app/api/v1/embeddings/route.ts` — embedding generation (6 providers)
|
||||
- `src/app/api/v1/images/generations/route.ts` — image generation (4+ providers incl. Antigravity/Nebius)
|
||||
- `src/app/api/v1/messages/count_tokens/route.ts`
|
||||
- `src/app/api/v1/providers/[provider]/chat/completions/route.ts` — dedicated per-provider chat
|
||||
- `src/app/api/v1/providers/[provider]/embeddings/route.ts` — dedicated per-provider embeddings
|
||||
- `src/app/api/v1/providers/[provider]/images/generations/route.ts` — dedicated per-provider images
|
||||
- `src/app/api/v1beta/models/route.ts`
|
||||
- `src/app/api/v1beta/models/[...path]/route.ts`
|
||||
|
||||
Management domains:
|
||||
|
||||
- Auth/settings: `src/app/api/auth/*`, `src/app/api/settings/*`
|
||||
- Providers/connections: `src/app/api/providers*`
|
||||
- Provider nodes: `src/app/api/provider-nodes*`
|
||||
- Custom models: `src/app/api/provider-models` (GET/POST/DELETE)
|
||||
- Model catalog: `src/app/api/models/route.ts` (GET)
|
||||
- Proxy config: `src/app/api/settings/proxy` (GET/PUT/DELETE) + `src/app/api/settings/proxy/test` (POST)
|
||||
- OAuth: `src/app/api/oauth/*`
|
||||
- Keys/aliases/combos/pricing: `src/app/api/keys*`, `src/app/api/models/alias`, `src/app/api/combos*`, `src/app/api/pricing`
|
||||
- Usage: `src/app/api/usage/*`
|
||||
- Sync/cloud: `src/app/api/sync/*`, `src/app/api/cloud/*`
|
||||
- CLI tooling helpers: `src/app/api/cli-tools/*`
|
||||
- IP filter: `src/app/api/settings/ip-filter` (GET/PUT)
|
||||
- Thinking budget: `src/app/api/settings/thinking-budget` (GET/PUT)
|
||||
- System prompt: `src/app/api/settings/system-prompt` (GET/PUT)
|
||||
- Sessions: `src/app/api/sessions` (GET)
|
||||
- Rate limits: `src/app/api/rate-limits` (GET)
|
||||
- Resilience: `src/app/api/resilience` (GET/PATCH) — provider profiles, circuit breaker, rate limit state
|
||||
- Resilience reset: `src/app/api/resilience/reset` (POST) — reset breakers + cooldowns
|
||||
- Cache stats: `src/app/api/cache/stats` (GET/DELETE)
|
||||
- Model availability: `src/app/api/models/availability` (GET/POST)
|
||||
- Telemetry: `src/app/api/telemetry/summary` (GET)
|
||||
- Budget: `src/app/api/usage/budget` (GET/POST)
|
||||
- Fallback chains: `src/app/api/fallback/chains` (GET/POST/DELETE)
|
||||
- Compliance audit: `src/app/api/compliance/audit-log` (GET)
|
||||
- Evals: `src/app/api/evals` (GET/POST), `src/app/api/evals/[suiteId]` (GET)
|
||||
- Policies: `src/app/api/policies` (GET/POST)
|
||||
|
||||
## 2) SSE + Translation Core
|
||||
|
||||
Main flow modules:
|
||||
|
||||
- Entry: `src/sse/handlers/chat.ts`
|
||||
- Core orchestration: `open-sse/handlers/chatCore.ts`
|
||||
- Provider execution adapters: `open-sse/executors/*`
|
||||
- Format detection/provider config: `open-sse/services/provider.ts`
|
||||
- Model parse/resolve: `src/sse/services/model.ts`, `open-sse/services/model.ts`
|
||||
- Account fallback logic: `open-sse/services/accountFallback.ts`
|
||||
- Translation registry: `open-sse/translator/index.ts`
|
||||
- Stream transformations: `open-sse/utils/stream.ts`, `open-sse/utils/streamHandler.ts`
|
||||
- Usage extraction/normalization: `open-sse/utils/usageTracking.ts`
|
||||
- Think tag parser: `open-sse/utils/thinkTagParser.ts`
|
||||
- Embedding handler: `open-sse/handlers/embeddings.ts`
|
||||
- Embedding provider registry: `open-sse/config/embeddingRegistry.ts`
|
||||
- Image generation handler: `open-sse/handlers/imageGeneration.ts`
|
||||
- Image provider registry: `open-sse/config/imageRegistry.ts`
|
||||
- Response sanitization: `open-sse/handlers/responseSanitizer.ts`
|
||||
- Role normalization: `open-sse/services/roleNormalizer.ts`
|
||||
|
||||
Services (business logic):
|
||||
|
||||
- Account selection/scoring: `open-sse/services/accountSelector.ts`
|
||||
- Context lifecycle management: `open-sse/services/contextManager.ts`
|
||||
- IP filter enforcement: `open-sse/services/ipFilter.ts`
|
||||
- Session tracking: `open-sse/services/sessionManager.ts`
|
||||
- Request deduplication: `open-sse/services/signatureCache.ts`
|
||||
- System prompt injection: `open-sse/services/systemPrompt.ts`
|
||||
- Thinking budget management: `open-sse/services/thinkingBudget.ts`
|
||||
- Wildcard model routing: `open-sse/services/wildcardRouter.ts`
|
||||
- Rate limit management: `open-sse/services/rateLimitManager.ts`
|
||||
- Circuit breaker: `open-sse/services/circuitBreaker.ts`
|
||||
|
||||
Domain layer modules:
|
||||
|
||||
- Model availability: `src/lib/domain/modelAvailability.ts`
|
||||
- Cost rules/budgets: `src/lib/domain/costRules.ts`
|
||||
- Fallback policy: `src/lib/domain/fallbackPolicy.ts`
|
||||
- Combo resolver: `src/lib/domain/comboResolver.ts`
|
||||
- Lockout policy: `src/lib/domain/lockoutPolicy.ts`
|
||||
- Policy engine: `src/domain/policyEngine.ts` — centralized lockout → budget → fallback evaluation
|
||||
- Error codes catalog: `src/lib/domain/errorCodes.ts`
|
||||
- Request ID: `src/lib/domain/requestId.ts`
|
||||
- Fetch timeout: `src/lib/domain/fetchTimeout.ts`
|
||||
- Request telemetry: `src/lib/domain/requestTelemetry.ts`
|
||||
- Compliance/audit: `src/lib/domain/compliance/index.ts`
|
||||
- Eval runner: `src/lib/domain/evalRunner.ts`
|
||||
- Domain state persistence: `src/lib/db/domainState.ts` — SQLite CRUD for fallback chains, budgets, cost history, lockout state, circuit breakers
|
||||
|
||||
OAuth provider modules (12 individual files under `src/lib/oauth/providers/`):
|
||||
|
||||
- Registry index: `src/lib/oauth/providers/index.ts`
|
||||
- Individual providers: `claude.ts`, `codex.ts`, `gemini.ts`, `antigravity.ts`, `iflow.ts`, `qwen.ts`, `kimi-coding.ts`, `github.ts`, `kiro.ts`, `cursor.ts`, `kilocode.ts`, `cline.ts`
|
||||
- Thin wrapper: `src/lib/oauth/providers.ts` — re-exports from individual modules
|
||||
|
||||
## 3) Persistence Layer
|
||||
|
||||
Primary state DB (SQLite):
|
||||
|
||||
- Core infra: `src/lib/db/core.ts` (better-sqlite3, migrations, WAL)
|
||||
- Re-export facade: `src/lib/localDb.ts` (thin compatibility layer for callers)
|
||||
- file: `${DATA_DIR}/storage.sqlite` (or `$XDG_CONFIG_HOME/omniroute/storage.sqlite` when set, else `~/.omniroute/storage.sqlite`)
|
||||
- entities (tables + KV namespaces): providerConnections, providerNodes, modelAliases, combos, apiKeys, settings, pricing, **customModels**, **proxyConfig**, **ipFilter**, **thinkingBudget**, **systemPrompt**
|
||||
|
||||
Usage persistence:
|
||||
|
||||
- facade: `src/lib/usageDb.ts` (decomposed modules in `src/lib/usage/*`)
|
||||
- SQLite tables in `storage.sqlite`: `usage_history`, `call_logs`, `proxy_logs`
|
||||
- optional file artifacts remain for compatibility/debug (`${DATA_DIR}/log.txt`, `${DATA_DIR}/call_logs/`, `<repo>/logs/...`)
|
||||
- legacy JSON files are migrated to SQLite by startup migrations when present
|
||||
|
||||
Domain State DB (SQLite):
|
||||
|
||||
- `src/lib/db/domainState.ts` — CRUD operations for domain state
|
||||
- Tables (created in `src/lib/db/core.ts`): `domain_fallback_chains`, `domain_budgets`, `domain_cost_history`, `domain_lockout_state`, `domain_circuit_breakers`
|
||||
- Write-through cache pattern: in-memory Maps are authoritative at runtime; mutations are written synchronously to SQLite; state is restored from DB on cold start
|
||||
|
||||
## 4) Auth + Security Surfaces
|
||||
|
||||
- Dashboard cookie auth: `src/proxy.ts`, `src/app/api/auth/login/route.ts`
|
||||
- API key generation/verification: `src/shared/utils/apiKey.ts`
|
||||
- Provider secrets persisted in `providerConnections` entries
|
||||
- Outbound proxy support via `open-sse/utils/proxyFetch.ts` (env vars) and `open-sse/utils/networkProxy.ts` (configurable per-provider or global)
|
||||
|
||||
## 5) Cloud Sync
|
||||
|
||||
- Scheduler init: `src/lib/initCloudSync.ts`, `src/shared/services/initializeCloudSync.ts`
|
||||
- Periodic task: `src/shared/services/cloudSyncScheduler.ts`
|
||||
- Control route: `src/app/api/sync/cloud/route.ts`
|
||||
|
||||
## Request Lifecycle (`/v1/chat/completions`)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant Client as CLI/SDK Client
|
||||
participant Route as /api/v1/chat/completions
|
||||
participant Chat as src/sse/handlers/chat
|
||||
participant Core as open-sse/handlers/chatCore
|
||||
participant Model as Model Resolver
|
||||
participant Auth as Credential Selector
|
||||
participant Exec as Provider Executor
|
||||
participant Prov as Upstream Provider
|
||||
participant Stream as Stream Translator
|
||||
participant Usage as usageDb
|
||||
|
||||
Client->>Route: POST /v1/chat/completions
|
||||
Route->>Chat: handleChat(request)
|
||||
Chat->>Model: parse/resolve model or combo
|
||||
|
||||
alt Combo model
|
||||
Chat->>Chat: iterate combo models (handleComboChat)
|
||||
end
|
||||
|
||||
Chat->>Auth: getProviderCredentials(provider)
|
||||
Auth-->>Chat: active account + tokens/api key
|
||||
|
||||
Chat->>Core: handleChatCore(body, modelInfo, credentials)
|
||||
Core->>Core: detect source format
|
||||
Core->>Core: translate request to target format
|
||||
Core->>Exec: execute(provider, transformedBody)
|
||||
Exec->>Prov: upstream API call
|
||||
Prov-->>Exec: SSE/JSON response
|
||||
Exec-->>Core: response + metadata
|
||||
|
||||
alt 401/403
|
||||
Core->>Exec: refreshCredentials()
|
||||
Exec-->>Core: updated tokens
|
||||
Core->>Exec: retry request
|
||||
end
|
||||
|
||||
Core->>Stream: translate/normalize stream to client format
|
||||
Stream-->>Client: SSE chunks / JSON response
|
||||
|
||||
Stream->>Usage: extract usage + persist history/log
|
||||
```
|
||||
|
||||
## Combo + Account Fallback Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[Incoming model string] --> B{Is combo name?}
|
||||
B -- Yes --> C[Load combo models sequence]
|
||||
B -- No --> D[Single model path]
|
||||
|
||||
C --> E[Try model N]
|
||||
E --> F[Resolve provider/model]
|
||||
D --> F
|
||||
|
||||
F --> G[Select account credentials]
|
||||
G --> H{Credentials available?}
|
||||
H -- No --> I[Return provider unavailable]
|
||||
H -- Yes --> J[Execute request]
|
||||
|
||||
J --> K{Success?}
|
||||
K -- Yes --> L[Return response]
|
||||
K -- No --> M{Fallback-eligible error?}
|
||||
|
||||
M -- No --> N[Return error]
|
||||
M -- Yes --> O[Mark account unavailable cooldown]
|
||||
O --> P{Another account for provider?}
|
||||
P -- Yes --> G
|
||||
P -- No --> Q{In combo with next model?}
|
||||
Q -- Yes --> E
|
||||
Q -- No --> R[Return all unavailable]
|
||||
```
|
||||
|
||||
Fallback decisions are driven by `open-sse/services/accountFallback.ts` using status codes and error-message heuristics.
|
||||
|
||||
## OAuth Onboarding and Token Refresh Lifecycle
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Dashboard UI
|
||||
participant OAuth as /api/oauth/[provider]/[action]
|
||||
participant ProvAuth as Provider Auth Server
|
||||
participant DB as localDb
|
||||
participant Test as /api/providers/[id]/test
|
||||
participant Exec as Provider Executor
|
||||
|
||||
UI->>OAuth: GET authorize or device-code
|
||||
OAuth->>ProvAuth: create auth/device flow
|
||||
ProvAuth-->>OAuth: auth URL or device code payload
|
||||
OAuth-->>UI: flow data
|
||||
|
||||
UI->>OAuth: POST exchange or poll
|
||||
OAuth->>ProvAuth: token exchange/poll
|
||||
ProvAuth-->>OAuth: access/refresh tokens
|
||||
OAuth->>DB: createProviderConnection(oauth data)
|
||||
OAuth-->>UI: success + connection id
|
||||
|
||||
UI->>Test: POST /api/providers/[id]/test
|
||||
Test->>Exec: validate credentials / optional refresh
|
||||
Exec-->>Test: valid or refreshed token info
|
||||
Test->>DB: update status/tokens/errors
|
||||
Test-->>UI: validation result
|
||||
```
|
||||
|
||||
Refresh during live traffic is executed inside `open-sse/handlers/chatCore.ts` via executor `refreshCredentials()`.
|
||||
|
||||
## Cloud Sync Lifecycle (Enable / Sync / Disable)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Endpoint Page UI
|
||||
participant Sync as /api/sync/cloud
|
||||
participant DB as localDb
|
||||
participant Cloud as External Cloud Sync
|
||||
participant Claude as ~/.claude/settings.json
|
||||
|
||||
UI->>Sync: POST action=enable
|
||||
Sync->>DB: set cloudEnabled=true
|
||||
Sync->>DB: ensure API key exists
|
||||
Sync->>Cloud: POST /sync/{machineId} (providers/aliases/combos/keys)
|
||||
Cloud-->>Sync: sync result
|
||||
Sync->>Cloud: GET /{machineId}/v1/verify
|
||||
Sync-->>UI: enabled + verification status
|
||||
|
||||
UI->>Sync: POST action=sync
|
||||
Sync->>Cloud: POST /sync/{machineId}
|
||||
Cloud-->>Sync: remote data
|
||||
Sync->>DB: update newer local tokens/status
|
||||
Sync-->>UI: synced
|
||||
|
||||
UI->>Sync: POST action=disable
|
||||
Sync->>DB: set cloudEnabled=false
|
||||
Sync->>Cloud: DELETE /sync/{machineId}
|
||||
Sync->>Claude: switch ANTHROPIC_BASE_URL back to local (if needed)
|
||||
Sync-->>UI: disabled
|
||||
```
|
||||
|
||||
Periodic sync is triggered by `CloudSyncScheduler` when cloud is enabled.
|
||||
|
||||
## Data Model and Storage Map
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
SETTINGS ||--o{ PROVIDER_CONNECTION : controls
|
||||
PROVIDER_NODE ||--o{ PROVIDER_CONNECTION : backs_compatible_provider
|
||||
PROVIDER_CONNECTION ||--o{ USAGE_ENTRY : emits_usage
|
||||
|
||||
SETTINGS {
|
||||
boolean cloudEnabled
|
||||
number stickyRoundRobinLimit
|
||||
boolean requireLogin
|
||||
string password_hash
|
||||
string fallbackStrategy
|
||||
json rateLimitDefaults
|
||||
json providerProfiles
|
||||
}
|
||||
|
||||
PROVIDER_CONNECTION {
|
||||
string id
|
||||
string provider
|
||||
string authType
|
||||
string name
|
||||
number priority
|
||||
boolean isActive
|
||||
string apiKey
|
||||
string accessToken
|
||||
string refreshToken
|
||||
string expiresAt
|
||||
string testStatus
|
||||
string lastError
|
||||
string rateLimitedUntil
|
||||
json providerSpecificData
|
||||
}
|
||||
|
||||
PROVIDER_NODE {
|
||||
string id
|
||||
string type
|
||||
string name
|
||||
string prefix
|
||||
string apiType
|
||||
string baseUrl
|
||||
}
|
||||
|
||||
MODEL_ALIAS {
|
||||
string alias
|
||||
string targetModel
|
||||
}
|
||||
|
||||
COMBO {
|
||||
string id
|
||||
string name
|
||||
string[] models
|
||||
}
|
||||
|
||||
API_KEY {
|
||||
string id
|
||||
string name
|
||||
string key
|
||||
string machineId
|
||||
}
|
||||
|
||||
USAGE_ENTRY {
|
||||
string provider
|
||||
string model
|
||||
number prompt_tokens
|
||||
number completion_tokens
|
||||
string connectionId
|
||||
string timestamp
|
||||
}
|
||||
|
||||
CUSTOM_MODEL {
|
||||
string id
|
||||
string name
|
||||
string providerId
|
||||
}
|
||||
|
||||
PROXY_CONFIG {
|
||||
string global
|
||||
json providers
|
||||
}
|
||||
|
||||
IP_FILTER {
|
||||
string mode
|
||||
string[] allowlist
|
||||
string[] blocklist
|
||||
}
|
||||
|
||||
THINKING_BUDGET {
|
||||
string mode
|
||||
number customBudget
|
||||
string effortLevel
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT {
|
||||
boolean enabled
|
||||
string prompt
|
||||
string position
|
||||
}
|
||||
```
|
||||
|
||||
Physical storage files:
|
||||
|
||||
- primary runtime DB: `${DATA_DIR}/storage.sqlite`
|
||||
- request log lines: `${DATA_DIR}/log.txt` (compat/debug artifact)
|
||||
- structured call payload archives: `${DATA_DIR}/call_logs/`
|
||||
- optional translator/request debug sessions: `<repo>/logs/...`
|
||||
|
||||
## Deployment Topology
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph LocalHost[Developer Host]
|
||||
CLI[CLI Tools]
|
||||
Browser[Dashboard Browser]
|
||||
end
|
||||
|
||||
subgraph ContainerOrProcess[OmniRoute Runtime]
|
||||
Next[Next.js Server\nPORT=20128]
|
||||
Core[SSE Core + Executors]
|
||||
MainDB[(storage.sqlite)]
|
||||
UsageDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph External[External Services]
|
||||
Providers[AI Providers]
|
||||
SyncCloud[Cloud Sync Service]
|
||||
end
|
||||
|
||||
CLI --> Next
|
||||
Browser --> Next
|
||||
Next --> Core
|
||||
Next --> MainDB
|
||||
Core --> MainDB
|
||||
Core --> UsageDB
|
||||
Core --> Providers
|
||||
Next --> SyncCloud
|
||||
```
|
||||
|
||||
## Module Mapping (Decision-Critical)
|
||||
|
||||
### Route and API Modules
|
||||
|
||||
- `src/app/api/v1/*`, `src/app/api/v1beta/*`: compatibility APIs
|
||||
- `src/app/api/v1/providers/[provider]/*`: dedicated per-provider routes (chat, embeddings, images)
|
||||
- `src/app/api/providers*`: provider CRUD, validation, testing
|
||||
- `src/app/api/provider-nodes*`: custom compatible node management
|
||||
- `src/app/api/provider-models`: custom model management (CRUD)
|
||||
- `src/app/api/models/route.ts`: model catalog API (aliases + custom models)
|
||||
- `src/app/api/oauth/*`: OAuth/device-code flows
|
||||
- `src/app/api/keys*`: local API key lifecycle
|
||||
- `src/app/api/models/alias`: alias management
|
||||
- `src/app/api/combos*`: fallback combo management
|
||||
- `src/app/api/pricing`: pricing overrides for cost calculation
|
||||
- `src/app/api/settings/proxy`: proxy configuration (GET/PUT/DELETE)
|
||||
- `src/app/api/settings/proxy/test`: outbound proxy connectivity test (POST)
|
||||
- `src/app/api/usage/*`: usage and logs APIs
|
||||
- `src/app/api/sync/*` + `src/app/api/cloud/*`: cloud sync and cloud-facing helpers
|
||||
- `src/app/api/cli-tools/*`: local CLI config writers/checkers
|
||||
- `src/app/api/settings/ip-filter`: IP allowlist/blocklist (GET/PUT)
|
||||
- `src/app/api/settings/thinking-budget`: thinking token budget config (GET/PUT)
|
||||
- `src/app/api/settings/system-prompt`: global system prompt (GET/PUT)
|
||||
- `src/app/api/sessions`: active session listing (GET)
|
||||
- `src/app/api/rate-limits`: per-account rate limit status (GET)
|
||||
|
||||
### Routing and Execution Core
|
||||
|
||||
- `src/sse/handlers/chat.ts`: request parse, combo handling, account selection loop
|
||||
- `open-sse/handlers/chatCore.ts`: translation, executor dispatch, retry/refresh handling, stream setup
|
||||
- `open-sse/executors/*`: provider-specific network and format behavior
|
||||
|
||||
### Translation Registry and Format Converters
|
||||
|
||||
- `open-sse/translator/index.ts`: translator registry and orchestration
|
||||
- Request translators: `open-sse/translator/request/*`
|
||||
- Response translators: `open-sse/translator/response/*`
|
||||
- Format constants: `open-sse/translator/formats.ts`
|
||||
|
||||
### Persistence
|
||||
|
||||
- `src/lib/db/*`: persistent config/state and domain persistence on SQLite
|
||||
- `src/lib/localDb.ts`: compatibility re-export for DB modules
|
||||
- `src/lib/usageDb.ts`: usage history/call logs facade on top of SQLite tables
|
||||
|
||||
## Provider Executor Coverage (Strategy Pattern)
|
||||
|
||||
Each provider has a specialized executor extending `BaseExecutor` (in `open-sse/executors/base.ts`), which provides URL building, header construction, retry with exponential backoff, credential refresh hooks, and the `execute()` orchestration method.
|
||||
|
||||
| Executor | Provider(s) | Special Handling |
|
||||
| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------- |
|
||||
| `DefaultExecutor` | OpenAI, Claude, Gemini, Qwen, iFlow, OpenRouter, GLM, Kimi, MiniMax, DeepSeek, Groq, xAI, Mistral, Perplexity, Together, Fireworks, Cerebras, Cohere, NVIDIA | Dynamic URL/header config per provider |
|
||||
| `AntigravityExecutor` | Google Antigravity | Custom project/session IDs, Retry-After parsing |
|
||||
| `CodexExecutor` | OpenAI Codex | Injects system instructions, forces reasoning effort |
|
||||
| `CursorExecutor` | Cursor IDE | ConnectRPC protocol, Protobuf encoding, request signing via checksum |
|
||||
| `GithubExecutor` | GitHub Copilot | Copilot token refresh, VSCode-mimicking headers |
|
||||
| `KiroExecutor` | AWS CodeWhisperer/Kiro | AWS EventStream binary format → SSE conversion |
|
||||
| `GeminiCLIExecutor` | Gemini CLI | Google OAuth token refresh cycle |
|
||||
|
||||
All other providers (including custom compatible nodes) use the `DefaultExecutor`.
|
||||
|
||||
## Provider Compatibility Matrix
|
||||
|
||||
| Provider | Format | Auth | Stream | Non-Stream | Token Refresh | Usage API |
|
||||
| ---------------- | ---------------- | --------------------- | ---------------- | ---------- | ------------- | ------------------ |
|
||||
| Claude | claude | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Admin only |
|
||||
| Gemini | gemini | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Gemini CLI | gemini-cli | OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Antigravity | antigravity | OAuth | ✅ | ✅ | ✅ | ✅ Full quota API |
|
||||
| OpenAI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Codex | openai-responses | OAuth | ✅ forced | ❌ | ✅ | ✅ Rate limits |
|
||||
| GitHub Copilot | openai | OAuth + Copilot Token | ✅ | ✅ | ✅ | ✅ Quota snapshots |
|
||||
| Cursor | cursor | Custom checksum | ✅ | ✅ | ❌ | ❌ |
|
||||
| Kiro | kiro | AWS SSO OIDC | ✅ (EventStream) | ❌ | ✅ | ✅ Usage limits |
|
||||
| Qwen | openai | OAuth | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| iFlow | openai | OAuth (Basic) | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| OpenRouter | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| GLM/Kimi/MiniMax | claude | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| DeepSeek | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Groq | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| xAI (Grok) | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Mistral | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Perplexity | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Together AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Fireworks AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cerebras | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cohere | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| NVIDIA NIM | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
|
||||
## Format Translation Coverage
|
||||
|
||||
Detected source formats include:
|
||||
|
||||
- `openai`
|
||||
- `openai-responses`
|
||||
- `claude`
|
||||
- `gemini`
|
||||
|
||||
Target formats include:
|
||||
|
||||
- OpenAI chat/Responses
|
||||
- Claude
|
||||
- Gemini/Gemini-CLI/Antigravity envelope
|
||||
- Kiro
|
||||
- Cursor
|
||||
|
||||
Translations use **OpenAI as the hub format** — all conversions go through OpenAI as intermediate:
|
||||
|
||||
```
|
||||
Source Format → OpenAI (hub) → Target Format
|
||||
```
|
||||
|
||||
Translations are selected dynamically based on source payload shape and provider target format.
|
||||
|
||||
Additional processing layers in the translation pipeline:
|
||||
|
||||
- **Response sanitization** — Strips non-standard fields from OpenAI-format responses (both streaming and non-streaming) to ensure strict SDK compliance
|
||||
- **Role normalization** — Converts `developer` → `system` for non-OpenAI targets; merges `system` → `user` for models that reject the system role (GLM, ERNIE)
|
||||
- **Think tag extraction** — Parses `<think>...</think>` blocks from content into `reasoning_content` field
|
||||
- **Structured output** — Converts OpenAI `response_format.json_schema` to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
## Supported API Endpoints
|
||||
|
||||
| Endpoint | Format | Handler |
|
||||
| -------------------------------------------------- | ------------------ | ---------------------------------------------------- |
|
||||
| `POST /v1/chat/completions` | OpenAI Chat | `src/sse/handlers/chat.ts` |
|
||||
| `POST /v1/messages` | Claude Messages | Same handler (auto-detected) |
|
||||
| `POST /v1/responses` | OpenAI Responses | `open-sse/handlers/responsesHandler.ts` |
|
||||
| `POST /v1/embeddings` | OpenAI Embeddings | `open-sse/handlers/embeddings.ts` |
|
||||
| `GET /v1/embeddings` | Model listing | API route |
|
||||
| `POST /v1/images/generations` | OpenAI Images | `open-sse/handlers/imageGeneration.ts` |
|
||||
| `GET /v1/images/generations` | Model listing | API route |
|
||||
| `POST /v1/providers/{provider}/chat/completions` | OpenAI Chat | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/embeddings` | OpenAI Embeddings | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/images/generations` | OpenAI Images | Dedicated per-provider with model validation |
|
||||
| `POST /v1/messages/count_tokens` | Claude Token Count | API route |
|
||||
| `GET /v1/models` | OpenAI Models list | API route (chat + embedding + image + custom models) |
|
||||
| `GET /api/models/catalog` | Catalog | All models grouped by provider + type |
|
||||
| `POST /v1beta/models/*:streamGenerateContent` | Gemini native | API route |
|
||||
| `GET/PUT/DELETE /api/settings/proxy` | Proxy Config | Network proxy configuration |
|
||||
| `POST /api/settings/proxy/test` | Proxy Connectivity | Proxy health/connectivity test endpoint |
|
||||
| `GET/POST/DELETE /api/provider-models` | Custom Models | Custom model management per provider |
|
||||
|
||||
## Bypass Handler
|
||||
|
||||
The bypass handler (`open-sse/utils/bypassHandler.ts`) intercepts known "throwaway" requests from Claude CLI — warmup pings, title extractions, and token counts — and returns a **fake response** without consuming upstream provider tokens. This is triggered only when `User-Agent` contains `claude-cli`.
|
||||
|
||||
## Request Logger Pipeline
|
||||
|
||||
The request logger (`open-sse/utils/requestLogger.ts`) provides a 7-stage debug logging pipeline, disabled by default, enabled via `ENABLE_REQUEST_LOGS=true`:
|
||||
|
||||
```
|
||||
1_req_client.json → 2_req_source.json → 3_req_openai.json → 4_req_target.json
|
||||
→ 5_res_provider.txt → 6_res_openai.txt → 7_res_client.txt
|
||||
```
|
||||
|
||||
Files are written to `<repo>/logs/<session>/` for each request session.
|
||||
|
||||
## Failure Modes and Resilience
|
||||
|
||||
## 1) Account/Provider Availability
|
||||
|
||||
- provider account cooldown on transient/rate/auth errors
|
||||
- account fallback before failing request
|
||||
- combo model fallback when current model/provider path is exhausted
|
||||
|
||||
## 2) Token Expiry
|
||||
|
||||
- pre-check and refresh with retry for refreshable providers
|
||||
- 401/403 retry after refresh attempt in core path
|
||||
|
||||
## 3) Stream Safety
|
||||
|
||||
- disconnect-aware stream controller
|
||||
- translation stream with end-of-stream flush and `[DONE]` handling
|
||||
- usage estimation fallback when provider usage metadata is missing
|
||||
|
||||
## 4) Cloud Sync Degradation
|
||||
|
||||
- sync errors are surfaced but local runtime continues
|
||||
- scheduler has retry-capable logic, but periodic execution currently calls single-attempt sync by default
|
||||
|
||||
## 5) Data Integrity
|
||||
|
||||
- SQLite schema migrations and auto-upgrade hooks at startup
|
||||
- legacy JSON → SQLite migration compatibility path
|
||||
|
||||
## Observability and Operational Signals
|
||||
|
||||
Runtime visibility sources:
|
||||
|
||||
- console logs from `src/sse/utils/logger.ts`
|
||||
- per-request usage aggregates in SQLite (`usage_history`, `call_logs`, `proxy_logs`)
|
||||
- textual request status log in `log.txt` (optional/compat)
|
||||
- optional deep request/translation logs under `logs/` when `ENABLE_REQUEST_LOGS=true`
|
||||
- dashboard usage endpoints (`/api/usage/*`) for UI consumption
|
||||
|
||||
## Security-Sensitive Boundaries
|
||||
|
||||
- JWT secret (`JWT_SECRET`) secures dashboard session cookie verification/signing
|
||||
- Initial password bootstrap (`INITIAL_PASSWORD`) should be explicitly configured for first-run provisioning
|
||||
- API key HMAC secret (`API_KEY_SECRET`) secures generated local API key format
|
||||
- Provider secrets (API keys/tokens) are persisted in local DB and should be protected at filesystem level
|
||||
- Cloud sync endpoints rely on API key auth + machine id semantics
|
||||
|
||||
## Environment and Runtime Matrix
|
||||
|
||||
Environment variables actively used by code:
|
||||
|
||||
- App/auth: `JWT_SECRET`, `INITIAL_PASSWORD`
|
||||
- Storage: `DATA_DIR`
|
||||
- Compatible node behavior: `ALLOW_MULTI_CONNECTIONS_PER_COMPAT_NODE`
|
||||
- Optional storage base override (Linux/macOS when `DATA_DIR` unset): `XDG_CONFIG_HOME`
|
||||
- Security hashing: `API_KEY_SECRET`, `MACHINE_ID_SALT`
|
||||
- Logging: `ENABLE_REQUEST_LOGS`
|
||||
- Sync/cloud URLing: `NEXT_PUBLIC_BASE_URL`, `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Outbound proxy: `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY` and lowercase variants
|
||||
- SOCKS5 feature flags: `ENABLE_SOCKS5_PROXY`, `NEXT_PUBLIC_ENABLE_SOCKS5_PROXY`
|
||||
- Platform/runtime helpers (not app-specific config): `APPDATA`, `NODE_ENV`, `PORT`, `HOSTNAME`
|
||||
|
||||
## Known Architectural Notes
|
||||
|
||||
1. `usageDb` and `localDb` share the same base directory policy (`DATA_DIR` -> `XDG_CONFIG_HOME/omniroute` -> `~/.omniroute`) with legacy file migration.
|
||||
2. `/api/v1/route.ts` delegates to the same unified catalog builder used by `/api/v1/models` (`src/app/api/v1/models/catalog.ts`) to avoid semantic drift.
|
||||
3. Request logger writes full headers/body when enabled; treat log directory as sensitive.
|
||||
4. Cloud behavior depends on correct `NEXT_PUBLIC_BASE_URL` and cloud endpoint reachability.
|
||||
5. The `open-sse/` directory is published as the `@omniroute/open-sse` **npm workspace package**. Source code imports it via `@omniroute/open-sse/...` (resolved by Next.js `transpilePackages`). File paths in this document still use the directory name `open-sse/` for consistency.
|
||||
6. Charts in the dashboard use **Recharts** (SVG-based) for accessible, interactive analytics visualizations (model usage bar charts, provider breakdown tables with success rates).
|
||||
7. E2E tests use **Playwright** (`tests/e2e/`), run via `npm run test:e2e`. Unit tests use **Node.js test runner** (`tests/unit/`), run via `npm run test:unit`. Source code under `src/` is **TypeScript** (`.ts`/`.tsx`); the `open-sse/` workspace remains JavaScript (`.js`).
|
||||
8. Settings page is organized into 5 tabs: Security, Routing (6 global strategies: fill-first, round-robin, p2c, random, least-used, cost-optimized), Resilience (editable rate limits, circuit breaker, policies), AI (thinking budget, system prompt, prompt cache), Advanced (proxy).
|
||||
|
||||
## Operational Verification Checklist
|
||||
|
||||
- Build from source: `npm run build`
|
||||
- Build Docker image: `docker build -t omniroute .`
|
||||
- Start service and verify:
|
||||
- `GET /api/settings`
|
||||
- `GET /api/v1/models`
|
||||
- CLI target base URL should be `http://<host>:20128/v1` when `PORT=20128`
|
||||
@@ -0,0 +1,67 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/AUTO-COMBO.md) · 🇪🇸 [es](../es/AUTO-COMBO.md) · 🇫🇷 [fr](../fr/AUTO-COMBO.md) · 🇩🇪 [de](../de/AUTO-COMBO.md) · 🇮🇹 [it](../it/AUTO-COMBO.md) · 🇷🇺 [ru](../ru/AUTO-COMBO.md) · 🇨🇳 [zh-CN](../zh-CN/AUTO-COMBO.md) · 🇯🇵 [ja](../ja/AUTO-COMBO.md) · 🇰🇷 [ko](../ko/AUTO-COMBO.md) · 🇸🇦 [ar](../ar/AUTO-COMBO.md) · 🇮🇳 [in](../in/AUTO-COMBO.md) · 🇹🇭 [th](../th/AUTO-COMBO.md) · 🇻🇳 [vi](../vi/AUTO-COMBO.md) · 🇮🇩 [id](../id/AUTO-COMBO.md) · 🇲🇾 [ms](../ms/AUTO-COMBO.md) · 🇳🇱 [nl](../nl/AUTO-COMBO.md) · 🇵🇱 [pl](../pl/AUTO-COMBO.md) · 🇸🇪 [sv](../sv/AUTO-COMBO.md) · 🇳🇴 [no](../no/AUTO-COMBO.md) · 🇩🇰 [da](../da/AUTO-COMBO.md) · 🇫🇮 [fi](../fi/AUTO-COMBO.md) · 🇵🇹 [pt](../pt/AUTO-COMBO.md) · 🇷🇴 [ro](../ro/AUTO-COMBO.md) · 🇭🇺 [hu](../hu/AUTO-COMBO.md) · 🇧🇬 [bg](../bg/AUTO-COMBO.md) · 🇸🇰 [sk](../sk/AUTO-COMBO.md) · 🇺🇦 [uk-UA](../uk-UA/AUTO-COMBO.md) · 🇮🇱 [he](../he/AUTO-COMBO.md) · 🇵🇭 [phi](../phi/AUTO-COMBO.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Auto-Combo Engine
|
||||
|
||||
> Self-managing model chains with adaptive scoring
|
||||
|
||||
## How It Works
|
||||
|
||||
The Auto-Combo Engine dynamically selects the best provider/model for each request using a **6-factor scoring function**:
|
||||
|
||||
| Factor | Weight | Description |
|
||||
| :--------- | :----- | :---------------------------------------------- |
|
||||
| Quota | 0.20 | Remaining capacity [0..1] |
|
||||
| Health | 0.25 | Circuit breaker: CLOSED=1.0, HALF=0.5, OPEN=0.0 |
|
||||
| CostInv | 0.20 | Inverse cost (cheaper = higher score) |
|
||||
| LatencyInv | 0.15 | Inverse p95 latency (faster = higher) |
|
||||
| TaskFit | 0.10 | Model × task type fitness score |
|
||||
| Stability | 0.10 | Low variance in latency/errors |
|
||||
|
||||
## Mode Packs
|
||||
|
||||
| Pack | Focus | Key Weight |
|
||||
| :---------------------- | :----------- | :--------------- |
|
||||
| 🚀 **Ship Fast** | Speed | latencyInv: 0.35 |
|
||||
| 💰 **Cost Saver** | Economy | costInv: 0.40 |
|
||||
| 🎯 **Quality First** | Best model | taskFit: 0.40 |
|
||||
| 📡 **Offline Friendly** | Availability | quota: 0.40 |
|
||||
|
||||
## Self-Healing
|
||||
|
||||
- **Temporary exclusion**: Score < 0.2 → excluded for 5 min (progressive backoff, max 30 min)
|
||||
- **Circuit breaker awareness**: OPEN → auto-excluded; HALF_OPEN → probe requests
|
||||
- **Incident mode**: >50% OPEN → disable exploration, maximize stability
|
||||
- **Cooldown recovery**: After exclusion, first request is a "probe" with reduced timeout
|
||||
|
||||
## Bandit Exploration
|
||||
|
||||
5% of requests (configurable) are routed to random providers for exploration. Disabled in incident mode.
|
||||
|
||||
## API
|
||||
|
||||
```bash
|
||||
# Create auto-combo
|
||||
curl -X POST http://localhost:20128/api/combos/auto \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"id":"my-auto","name":"Auto Coder","candidatePool":["anthropic","google","openai"],"modePack":"ship-fast"}'
|
||||
|
||||
# List auto-combos
|
||||
curl http://localhost:20128/api/combos/auto
|
||||
```
|
||||
|
||||
## Task Fitness
|
||||
|
||||
30+ models scored across 6 task types (`coding`, `review`, `planning`, `analysis`, `debugging`, `documentation`). Supports wildcard patterns (e.g., `*-coder` → high coding score).
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------ |
|
||||
| `open-sse/services/autoCombo/scoring.ts` | Scoring function & pool normalization |
|
||||
| `open-sse/services/autoCombo/taskFitness.ts` | Model × task fitness lookup |
|
||||
| `open-sse/services/autoCombo/engine.ts` | Selection logic, bandit, budget cap |
|
||||
| `open-sse/services/autoCombo/selfHealing.ts` | Exclusion, probes, incident mode |
|
||||
| `open-sse/services/autoCombo/modePacks.ts` | 4 weight profiles |
|
||||
| `src/app/api/combos/auto/route.ts` | REST API |
|
||||
@@ -0,0 +1,593 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/CODEBASE_DOCUMENTATION.md) · 🇪🇸 [es](../es/CODEBASE_DOCUMENTATION.md) · 🇫🇷 [fr](../fr/CODEBASE_DOCUMENTATION.md) · 🇩🇪 [de](../de/CODEBASE_DOCUMENTATION.md) · 🇮🇹 [it](../it/CODEBASE_DOCUMENTATION.md) · 🇷🇺 [ru](../ru/CODEBASE_DOCUMENTATION.md) · 🇨🇳 [zh-CN](../zh-CN/CODEBASE_DOCUMENTATION.md) · 🇯🇵 [ja](../ja/CODEBASE_DOCUMENTATION.md) · 🇰🇷 [ko](../ko/CODEBASE_DOCUMENTATION.md) · 🇸🇦 [ar](../ar/CODEBASE_DOCUMENTATION.md) · 🇮🇳 [in](../in/CODEBASE_DOCUMENTATION.md) · 🇹🇭 [th](../th/CODEBASE_DOCUMENTATION.md) · 🇻🇳 [vi](../vi/CODEBASE_DOCUMENTATION.md) · 🇮🇩 [id](../id/CODEBASE_DOCUMENTATION.md) · 🇲🇾 [ms](../ms/CODEBASE_DOCUMENTATION.md) · 🇳🇱 [nl](../nl/CODEBASE_DOCUMENTATION.md) · 🇵🇱 [pl](../pl/CODEBASE_DOCUMENTATION.md) · 🇸🇪 [sv](../sv/CODEBASE_DOCUMENTATION.md) · 🇳🇴 [no](../no/CODEBASE_DOCUMENTATION.md) · 🇩🇰 [da](../da/CODEBASE_DOCUMENTATION.md) · 🇫🇮 [fi](../fi/CODEBASE_DOCUMENTATION.md) · 🇵🇹 [pt](../pt/CODEBASE_DOCUMENTATION.md) · 🇷🇴 [ro](../ro/CODEBASE_DOCUMENTATION.md) · 🇭🇺 [hu](../hu/CODEBASE_DOCUMENTATION.md) · 🇧🇬 [bg](../bg/CODEBASE_DOCUMENTATION.md) · 🇸🇰 [sk](../sk/CODEBASE_DOCUMENTATION.md) · 🇺🇦 [uk-UA](../uk-UA/CODEBASE_DOCUMENTATION.md) · 🇮🇱 [he](../he/CODEBASE_DOCUMENTATION.md) · 🇵🇭 [phi](../phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
---
|
||||
|
||||
# omniroute — Codebase Documentation
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](CODEBASE_DOCUMENTATION.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/CODEBASE_DOCUMENTATION.md) | 🇪🇸 [Español](i18n/es/CODEBASE_DOCUMENTATION.md) | 🇫🇷 [Français](i18n/fr/CODEBASE_DOCUMENTATION.md) | 🇮🇹 [Italiano](i18n/it/CODEBASE_DOCUMENTATION.md) | 🇷🇺 [Русский](i18n/ru/CODEBASE_DOCUMENTATION.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/CODEBASE_DOCUMENTATION.md) | 🇩🇪 [Deutsch](i18n/de/CODEBASE_DOCUMENTATION.md) | 🇮🇳 [हिन्दी](i18n/in/CODEBASE_DOCUMENTATION.md) | 🇹🇭 [ไทย](i18n/th/CODEBASE_DOCUMENTATION.md) | 🇺🇦 [Українська](i18n/uk-UA/CODEBASE_DOCUMENTATION.md) | 🇸🇦 [العربية](i18n/ar/CODEBASE_DOCUMENTATION.md) | 🇯🇵 [日本語](i18n/ja/CODEBASE_DOCUMENTATION.md) | 🇻🇳 [Tiếng Việt](i18n/vi/CODEBASE_DOCUMENTATION.md) | 🇧🇬 [Български](i18n/bg/CODEBASE_DOCUMENTATION.md) | 🇩🇰 [Dansk](i18n/da/CODEBASE_DOCUMENTATION.md) | 🇫🇮 [Suomi](i18n/fi/CODEBASE_DOCUMENTATION.md) | 🇮🇱 [עברית](i18n/he/CODEBASE_DOCUMENTATION.md) | 🇭🇺 [Magyar](i18n/hu/CODEBASE_DOCUMENTATION.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/CODEBASE_DOCUMENTATION.md) | 🇰🇷 [한국어](i18n/ko/CODEBASE_DOCUMENTATION.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/CODEBASE_DOCUMENTATION.md) | 🇳🇱 [Nederlands](i18n/nl/CODEBASE_DOCUMENTATION.md) | 🇳🇴 [Norsk](i18n/no/CODEBASE_DOCUMENTATION.md) | 🇵🇹 [Português (Portugal)](i18n/pt/CODEBASE_DOCUMENTATION.md) | 🇷🇴 [Română](i18n/ro/CODEBASE_DOCUMENTATION.md) | 🇵🇱 [Polski](i18n/pl/CODEBASE_DOCUMENTATION.md) | 🇸🇰 [Slovenčina](i18n/sk/CODEBASE_DOCUMENTATION.md) | 🇸🇪 [Svenska](i18n/sv/CODEBASE_DOCUMENTATION.md) | 🇵🇭 [Filipino](i18n/phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
> A comprehensive, beginner-friendly guide to the **omniroute** multi-provider AI proxy router.
|
||||
|
||||
---
|
||||
|
||||
## 1. What Is omniroute?
|
||||
|
||||
omniroute is a **proxy router** that sits between AI clients (Claude CLI, Codex, Cursor IDE, etc.) and AI providers (Anthropic, Google, OpenAI, AWS, GitHub, etc.). It solves one big problem:
|
||||
|
||||
> **Different AI clients speak different "languages" (API formats), and different AI providers expect different "languages" too.** omniroute translates between them automatically.
|
||||
|
||||
Think of it like a universal translator at the United Nations — any delegate can speak any language, and the translator converts it for any other delegate.
|
||||
|
||||
---
|
||||
|
||||
## 2. Architecture Overview
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph Clients
|
||||
A[Claude CLI]
|
||||
B[Codex]
|
||||
C[Cursor IDE]
|
||||
D[OpenAI-compatible]
|
||||
end
|
||||
|
||||
subgraph omniroute
|
||||
E[Handler Layer]
|
||||
F[Translator Layer]
|
||||
G[Executor Layer]
|
||||
H[Services Layer]
|
||||
end
|
||||
|
||||
subgraph Providers
|
||||
I[Anthropic Claude]
|
||||
J[Google Gemini]
|
||||
K[OpenAI / Codex]
|
||||
L[GitHub Copilot]
|
||||
M[AWS Kiro]
|
||||
N[Antigravity]
|
||||
O[Cursor API]
|
||||
end
|
||||
|
||||
A --> E
|
||||
B --> E
|
||||
C --> E
|
||||
D --> E
|
||||
E --> F
|
||||
F --> G
|
||||
G --> I
|
||||
G --> J
|
||||
G --> K
|
||||
G --> L
|
||||
G --> M
|
||||
G --> N
|
||||
G --> O
|
||||
H -.-> E
|
||||
H -.-> G
|
||||
```
|
||||
|
||||
### Core Principle: Hub-and-Spoke Translation
|
||||
|
||||
All format translation passes through **OpenAI format as the hub**:
|
||||
|
||||
```
|
||||
Client Format → [OpenAI Hub] → Provider Format (request)
|
||||
Provider Format → [OpenAI Hub] → Client Format (response)
|
||||
```
|
||||
|
||||
This means you only need **N translators** (one per format) instead of **N²** (every pair).
|
||||
|
||||
---
|
||||
|
||||
## 3. Project Structure
|
||||
|
||||
```
|
||||
omniroute/
|
||||
├── open-sse/ ← Core proxy library (portable, framework-agnostic)
|
||||
│ ├── index.js ← Main entry point, exports everything
|
||||
│ ├── config/ ← Configuration & constants
|
||||
│ ├── executors/ ← Provider-specific request execution
|
||||
│ ├── handlers/ ← Request handling orchestration
|
||||
│ ├── services/ ← Business logic (auth, models, fallback, usage)
|
||||
│ ├── translator/ ← Format translation engine
|
||||
│ │ ├── request/ ← Request translators (8 files)
|
||||
│ │ ├── response/ ← Response translators (7 files)
|
||||
│ │ └── helpers/ ← Shared translation utilities (6 files)
|
||||
│ └── utils/ ← Utility functions
|
||||
├── src/ ← Application layer (Express/Worker runtime)
|
||||
│ ├── app/ ← Web UI, API routes, middleware
|
||||
│ ├── lib/ ← Database, auth, and shared library code
|
||||
│ ├── mitm/ ← Man-in-the-middle proxy utilities
|
||||
│ ├── models/ ← Database models
|
||||
│ ├── shared/ ← Shared utilities (wrappers around open-sse)
|
||||
│ ├── sse/ ← SSE endpoint handlers
|
||||
│ └── store/ ← State management
|
||||
├── data/ ← Runtime data (credentials, logs)
|
||||
│ └── provider-credentials.json (external credentials override, gitignored)
|
||||
└── tester/ ← Test utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Module-by-Module Breakdown
|
||||
|
||||
### 4.1 Config (`open-sse/config/`)
|
||||
|
||||
The **single source of truth** for all provider configuration.
|
||||
|
||||
| File | Purpose |
|
||||
| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `constants.ts` | `PROVIDERS` object with base URLs, OAuth credentials (defaults), headers, and default system prompts for every provider. Also defines `HTTP_STATUS`, `ERROR_TYPES`, `COOLDOWN_MS`, `BACKOFF_CONFIG`, and `SKIP_PATTERNS`. |
|
||||
| `credentialLoader.ts` | Loads external credentials from `data/provider-credentials.json` and merges them over the hardcoded defaults in `PROVIDERS`. Keeps secrets out of source control while maintaining backwards compatibility. |
|
||||
| `providerModels.ts` | Central model registry: maps provider aliases → model IDs. Functions like `getModels()`, `getProviderByAlias()`. |
|
||||
| `codexInstructions.ts` | System instructions injected into Codex requests (editing constraints, sandbox rules, approval policies). |
|
||||
| `defaultThinkingSignature.ts` | Default "thinking" signatures for Claude and Gemini models. |
|
||||
| `ollamaModels.ts` | Schema definition for local Ollama models (name, size, family, quantization). |
|
||||
|
||||
#### Credential Loading Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["App starts"] --> B["constants.ts defines PROVIDERS\nwith hardcoded defaults"]
|
||||
B --> C{"data/provider-credentials.json\nexists?"}
|
||||
C -->|Yes| D["credentialLoader reads JSON"]
|
||||
C -->|No| E["Use hardcoded defaults"]
|
||||
D --> F{"For each provider in JSON"}
|
||||
F --> G{"Provider exists\nin PROVIDERS?"}
|
||||
G -->|No| H["Log warning, skip"]
|
||||
G -->|Yes| I{"Value is object?"}
|
||||
I -->|No| J["Log warning, skip"]
|
||||
I -->|Yes| K["Merge clientId, clientSecret,\ntokenUrl, authUrl, refreshUrl"]
|
||||
K --> F
|
||||
H --> F
|
||||
J --> F
|
||||
F -->|Done| L["PROVIDERS ready with\nmerged credentials"]
|
||||
E --> L
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 Executors (`open-sse/executors/`)
|
||||
|
||||
Executors encapsulate **provider-specific logic** using the **Strategy Pattern**. Each executor overrides base methods as needed.
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BaseExecutor {
|
||||
+buildUrl(model, stream, options)
|
||||
+buildHeaders(credentials, stream, body)
|
||||
+transformRequest(body, model, stream, credentials)
|
||||
+execute(url, options)
|
||||
+shouldRetry(status, error)
|
||||
+refreshCredentials(credentials, log)
|
||||
}
|
||||
|
||||
class DefaultExecutor {
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class AntigravityExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+shouldRetry()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class CursorExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseResponse()
|
||||
+generateChecksum()
|
||||
}
|
||||
|
||||
class KiroExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseEventStream()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
BaseExecutor <|-- DefaultExecutor
|
||||
BaseExecutor <|-- AntigravityExecutor
|
||||
BaseExecutor <|-- CursorExecutor
|
||||
BaseExecutor <|-- KiroExecutor
|
||||
BaseExecutor <|-- CodexExecutor
|
||||
BaseExecutor <|-- GeminiCLIExecutor
|
||||
BaseExecutor <|-- GithubExecutor
|
||||
```
|
||||
|
||||
| Executor | Provider | Key Specializations |
|
||||
| ---------------- | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- |
|
||||
| `base.ts` | — | Abstract base: URL building, headers, retry logic, credential refresh |
|
||||
| `default.ts` | Claude, Gemini, OpenAI, GLM, Kimi, MiniMax | Generic OAuth token refresh for standard providers |
|
||||
| `antigravity.ts` | Google Cloud Code | Project/session ID generation, multi-URL fallback, custom retry parsing from error messages ("reset after 2h7m23s") |
|
||||
| `cursor.ts` | Cursor IDE | **Most complex**: SHA-256 checksum auth, Protobuf request encoding, binary EventStream → SSE response parsing |
|
||||
| `codex.ts` | OpenAI Codex | Injects system instructions, manages thinking levels, removes unsupported parameters |
|
||||
| `gemini-cli.ts` | Google Gemini CLI | Custom URL building (`streamGenerateContent`), Google OAuth token refresh |
|
||||
| `github.ts` | GitHub Copilot | Dual token system (GitHub OAuth + Copilot token), VSCode header mimicking |
|
||||
| `kiro.ts` | AWS CodeWhisperer | AWS EventStream binary parsing, AMZN event frames, token estimation |
|
||||
| `index.ts` | — | Factory: maps provider name → executor class, with default fallback |
|
||||
|
||||
---
|
||||
|
||||
### 4.3 Handlers (`open-sse/handlers/`)
|
||||
|
||||
The **orchestration layer** — coordinates translation, execution, streaming, and error handling.
|
||||
|
||||
| File | Purpose |
|
||||
| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `chatCore.ts` | **Central orchestrator** (~600 lines). Handles the complete request lifecycle: format detection → translation → executor dispatch → streaming/non-streaming response → token refresh → error handling → usage logging. |
|
||||
| `responsesHandler.ts` | Adapter for OpenAI's Responses API: converts Responses format → Chat Completions → sends to `chatCore` → converts SSE back to Responses format. |
|
||||
| `embeddings.ts` | Embedding generation handler: resolves embedding model → provider, dispatches to provider API, returns OpenAI-compatible embedding response. Supports 6+ providers. |
|
||||
| `imageGeneration.ts` | Image generation handler: resolves image model → provider, supports OpenAI-compatible, Gemini-image (Antigravity), and fallback (Nebius) modes. Returns base64 or URL images. |
|
||||
|
||||
#### Request Lifecycle (chatCore.ts)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant chatCore
|
||||
participant Translator
|
||||
participant Executor
|
||||
participant Provider
|
||||
|
||||
Client->>chatCore: Request (any format)
|
||||
chatCore->>chatCore: Detect source format
|
||||
chatCore->>chatCore: Check bypass patterns
|
||||
chatCore->>chatCore: Resolve model & provider
|
||||
chatCore->>Translator: Translate request (source → OpenAI → target)
|
||||
chatCore->>Executor: Get executor for provider
|
||||
Executor->>Executor: Build URL, headers, transform request
|
||||
Executor->>Executor: Refresh credentials if needed
|
||||
Executor->>Provider: HTTP fetch (streaming or non-streaming)
|
||||
|
||||
alt Streaming
|
||||
Provider-->>chatCore: SSE stream
|
||||
chatCore->>chatCore: Pipe through SSE transform stream
|
||||
Note over chatCore: Transform stream translates<br/>each chunk: target → OpenAI → source
|
||||
chatCore-->>Client: Translated SSE stream
|
||||
else Non-streaming
|
||||
Provider-->>chatCore: JSON response
|
||||
chatCore->>Translator: Translate response
|
||||
chatCore-->>Client: Translated JSON
|
||||
end
|
||||
|
||||
alt Error (401, 429, 500...)
|
||||
chatCore->>Executor: Retry with credential refresh
|
||||
chatCore->>chatCore: Account fallback logic
|
||||
end
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.4 Services (`open-sse/services/`)
|
||||
|
||||
Business logic that supports the handlers and executors.
|
||||
|
||||
| File | Purpose |
|
||||
| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `provider.ts` | **Format detection** (`detectFormat`): analyzes request body structure to identify Claude/OpenAI/Gemini/Antigravity/Responses formats (includes `max_tokens` heuristic for Claude). Also: URL building, header building, thinking config normalization. Supports `openai-compatible-*` and `anthropic-compatible-*` dynamic providers. |
|
||||
| `model.ts` | Model string parsing (`claude/model-name` → `{provider: "claude", model: "model-name"}`), alias resolution with collision detection, input sanitization (rejects path traversal/control chars), and model info resolution with async alias getter support. |
|
||||
| `accountFallback.ts` | Rate-limit handling: exponential backoff (1s → 2s → 4s → max 2min), account cooldown management, error classification (which errors trigger fallback vs. not). |
|
||||
| `tokenRefresh.ts` | OAuth token refresh for **every provider**: Google (Gemini, Antigravity), Claude, Codex, Qwen, iFlow, GitHub (OAuth + Copilot dual-token), Kiro (AWS SSO OIDC + Social Auth). Includes in-flight promise deduplication cache and retry with exponential backoff. |
|
||||
| `combo.ts` | **Combo models**: chains of fallback models. If model A fails with a fallback-eligible error, try model B, then C, etc. Returns actual upstream status codes. |
|
||||
| `usage.ts` | Fetches quota/usage data from provider APIs (GitHub Copilot quotas, Antigravity model quotas, Codex rate limits, Kiro usage breakdowns, Claude settings). |
|
||||
| `accountSelector.ts` | Smart account selection with scoring algorithm: considers priority, health status, round-robin position, and cooldown state to pick the optimal account for each request. |
|
||||
| `contextManager.ts` | Request context lifecycle management: creates and tracks per-request context objects with metadata (request ID, timestamps, provider info) for debugging and logging. |
|
||||
| `ipFilter.ts` | IP-based access control: supports allowlist and blocklist modes. Validates client IP against configured rules before processing API requests. |
|
||||
| `sessionManager.ts` | Session tracking with client fingerprinting: tracks active sessions using hashed client identifiers, monitors request counts, and provides session metrics. |
|
||||
| `signatureCache.ts` | Request signature-based deduplication cache: prevents duplicate requests by caching recent request signatures and returning cached responses for identical requests within a time window. |
|
||||
| `systemPrompt.ts` | Global system prompt injection: prepends or appends a configurable system prompt to all requests, with per-provider compatibility handling. |
|
||||
| `thinkingBudget.ts` | Reasoning token budget management: supports passthrough, auto (strip thinking config), custom (fixed budget), and adaptive (complexity-scaled) modes for controlling thinking/reasoning tokens. |
|
||||
| `wildcardRouter.ts` | Wildcard model pattern routing: resolves wildcard patterns (e.g., `*/claude-*`) to concrete provider/model pairs based on availability and priority. |
|
||||
|
||||
#### Token Refresh Deduplication
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant R1 as Request 1
|
||||
participant R2 as Request 2
|
||||
participant Cache as refreshPromiseCache
|
||||
participant OAuth as OAuth Provider
|
||||
|
||||
R1->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: No in-flight promise
|
||||
Cache->>OAuth: Start refresh
|
||||
R2->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: Found in-flight promise
|
||||
Cache-->>R2: Return existing promise
|
||||
OAuth-->>Cache: New access token
|
||||
Cache-->>R1: New access token
|
||||
Cache-->>R2: Same access token (shared)
|
||||
Cache->>Cache: Delete cache entry
|
||||
```
|
||||
|
||||
#### Account Fallback State Machine
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> Active
|
||||
Active --> Error: Request fails (401/429/500)
|
||||
Error --> Cooldown: Apply backoff
|
||||
Cooldown --> Active: Cooldown expires
|
||||
Active --> Active: Request succeeds (reset backoff)
|
||||
|
||||
state Error {
|
||||
[*] --> ClassifyError
|
||||
ClassifyError --> ShouldFallback: Rate limit / Auth / Transient
|
||||
ClassifyError --> NoFallback: 400 Bad Request
|
||||
}
|
||||
|
||||
state Cooldown {
|
||||
[*] --> ExponentialBackoff
|
||||
ExponentialBackoff: Level 0 = 1s
|
||||
ExponentialBackoff: Level 1 = 2s
|
||||
ExponentialBackoff: Level 2 = 4s
|
||||
ExponentialBackoff: Max = 2min
|
||||
}
|
||||
```
|
||||
|
||||
#### Combo Model Chain
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Request with\ncombo model"] --> B["Model A"]
|
||||
B -->|"2xx Success"| C["Return response"]
|
||||
B -->|"429/401/500"| D{"Fallback\neligible?"}
|
||||
D -->|Yes| E["Model B"]
|
||||
D -->|No| F["Return error"]
|
||||
E -->|"2xx Success"| C
|
||||
E -->|"429/401/500"| G{"Fallback\neligible?"}
|
||||
G -->|Yes| H["Model C"]
|
||||
G -->|No| F
|
||||
H -->|"2xx Success"| C
|
||||
H -->|"Fail"| I["All failed →\nReturn last status"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.5 Translator (`open-sse/translator/`)
|
||||
|
||||
The **format translation engine** using a self-registering plugin system.
|
||||
|
||||
#### Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "Request Translation"
|
||||
A["Claude → OpenAI"]
|
||||
B["Gemini → OpenAI"]
|
||||
C["Antigravity → OpenAI"]
|
||||
D["OpenAI Responses → OpenAI"]
|
||||
E["OpenAI → Claude"]
|
||||
F["OpenAI → Gemini"]
|
||||
G["OpenAI → Kiro"]
|
||||
H["OpenAI → Cursor"]
|
||||
end
|
||||
|
||||
subgraph "Response Translation"
|
||||
I["Claude → OpenAI"]
|
||||
J["Gemini → OpenAI"]
|
||||
K["Kiro → OpenAI"]
|
||||
L["Cursor → OpenAI"]
|
||||
M["OpenAI → Claude"]
|
||||
N["OpenAI → Antigravity"]
|
||||
O["OpenAI → Responses"]
|
||||
end
|
||||
```
|
||||
|
||||
| Directory | Files | Description |
|
||||
| ------------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `request/` | 8 translators | Convert request bodies between formats. Each file self-registers via `register(from, to, fn)` on import. |
|
||||
| `response/` | 7 translators | Convert streaming response chunks between formats. Handles SSE event types, thinking blocks, tool calls. |
|
||||
| `helpers/` | 6 helpers | Shared utilities: `claudeHelper` (system prompt extraction, thinking config), `geminiHelper` (parts/contents mapping), `openaiHelper` (format filtering), `toolCallHelper` (ID generation, missing response injection), `maxTokensHelper`, `responsesApiHelper`. |
|
||||
| `index.ts` | — | Translation engine: `translateRequest()`, `translateResponse()`, state management, registry. |
|
||||
| `formats.ts` | — | Format constants: `OPENAI`, `CLAUDE`, `GEMINI`, `ANTIGRAVITY`, `KIRO`, `CURSOR`, `OPENAI_RESPONSES`. |
|
||||
|
||||
#### Key Design: Self-Registering Plugins
|
||||
|
||||
```javascript
|
||||
// Each translator file calls register() on import:
|
||||
import { register } from "../index.js";
|
||||
register("claude", "openai", translateClaudeToOpenAI);
|
||||
|
||||
// The index.js imports all translator files, triggering registration:
|
||||
import "./request/claude-to-openai.js"; // ← self-registers
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.6 Utils (`open-sse/utils/`)
|
||||
|
||||
| File | Purpose |
|
||||
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `error.ts` | Error response building (OpenAI-compatible format), upstream error parsing, Antigravity retry-time extraction from error messages, SSE error streaming. |
|
||||
| `stream.ts` | **SSE Transform Stream** — the core streaming pipeline. Two modes: `TRANSLATE` (full format translation) and `PASSTHROUGH` (normalize + extract usage). Handles chunk buffering, usage estimation, content length tracking. Per-stream encoder/decoder instances avoid shared state. |
|
||||
| `streamHelpers.ts` | Low-level SSE utilities: `parseSSELine` (whitespace-tolerant), `hasValuableContent` (filters empty chunks for OpenAI/Claude/Gemini), `fixInvalidId`, `formatSSE` (format-aware SSE serialization with `perf_metrics` cleanup). |
|
||||
| `usageTracking.ts` | Token usage extraction from any format (Claude/OpenAI/Gemini/Responses), estimation with separate tool/message char-per-token ratios, buffer addition (2000 tokens safety margin), format-specific field filtering, console logging with ANSI colors. |
|
||||
| `requestLogger.ts` | File-based request logging (opt-in via `ENABLE_REQUEST_LOGS=true`). Creates session folders with numbered files: `1_req_client.json` → `7_res_client.txt`. All I/O is async (fire-and-forget). Masks sensitive headers. |
|
||||
| `bypassHandler.ts` | Intercepts specific patterns from Claude CLI (title extraction, warmup, count) and returns fake responses without calling any provider. Supports both streaming and non-streaming. Intentionally limited to Claude CLI scope. |
|
||||
| `networkProxy.ts` | Resolves outbound proxy URL for a given provider with precedence: provider-specific config → global config → environment variables (`HTTPS_PROXY`/`HTTP_PROXY`/`ALL_PROXY`). Supports `NO_PROXY` exclusions. Caches config for 30s. |
|
||||
|
||||
#### SSE Streaming Pipeline
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["Provider SSE stream"] --> B["TextDecoder\n(per-stream instance)"]
|
||||
B --> C["Buffer lines\n(split on newline)"]
|
||||
C --> D["parseSSELine()\n(trim whitespace, parse JSON)"]
|
||||
D --> E{"Mode?"}
|
||||
E -->|TRANSLATE| F["translateResponse()\ntarget → OpenAI → source"]
|
||||
E -->|PASSTHROUGH| G["fixInvalidId()\nnormalize chunk"]
|
||||
F --> H["hasValuableContent()\nfilter empty chunks"]
|
||||
G --> H
|
||||
H -->|"Has content"| I["extractUsage()\ntrack token counts"]
|
||||
H -->|"Empty"| J["Skip chunk"]
|
||||
I --> K["formatSSE()\nserialize + clean perf_metrics"]
|
||||
K --> L["TextEncoder\n(per-stream instance)"]
|
||||
L --> M["Enqueue to\nclient stream"]
|
||||
|
||||
style A fill:#f9f,stroke:#333
|
||||
style M fill:#9f9,stroke:#333
|
||||
```
|
||||
|
||||
#### Request Logger Session Structure
|
||||
|
||||
```
|
||||
logs/
|
||||
└── claude_gemini_claude-sonnet_20260208_143045/
|
||||
├── 1_req_client.json ← Raw client request
|
||||
├── 2_req_source.json ← After initial conversion
|
||||
├── 3_req_openai.json ← OpenAI intermediate format
|
||||
├── 4_req_target.json ← Final target format
|
||||
├── 5_res_provider.txt ← Provider SSE chunks (streaming)
|
||||
├── 5_res_provider.json ← Provider response (non-streaming)
|
||||
├── 6_res_openai.txt ← OpenAI intermediate chunks
|
||||
├── 7_res_client.txt ← Client-facing SSE chunks
|
||||
└── 6_error.json ← Error details (if any)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.7 Application Layer (`src/`)
|
||||
|
||||
| Directory | Purpose |
|
||||
| ------------- | ---------------------------------------------------------------------- |
|
||||
| `src/app/` | Web UI, API routes, Express middleware, OAuth callback handlers |
|
||||
| `src/lib/` | Database access (`localDb.ts`, `usageDb.ts`), authentication, shared |
|
||||
| `src/mitm/` | Man-in-the-middle proxy utilities for intercepting provider traffic |
|
||||
| `src/models/` | Database model definitions |
|
||||
| `src/shared/` | Wrappers around open-sse functions (provider, stream, error, etc.) |
|
||||
| `src/sse/` | SSE endpoint handlers that wire the open-sse library to Express routes |
|
||||
| `src/store/` | Application state management |
|
||||
|
||||
#### Notable API Routes
|
||||
|
||||
| Route | Methods | Purpose |
|
||||
| --------------------------------------------- | --------------- | ------------------------------------------------------------------------------------- |
|
||||
| `/api/provider-models` | GET/POST/DELETE | CRUD for custom models per provider |
|
||||
| `/api/models/catalog` | GET | Aggregated catalog of all models (chat, embedding, image, custom) grouped by provider |
|
||||
| `/api/settings/proxy` | GET/PUT/DELETE | Hierarchical outbound proxy configuration (`global/providers/combos/keys`) |
|
||||
| `/api/settings/proxy/test` | POST | Validates proxy connectivity and returns public IP/latency |
|
||||
| `/v1/providers/[provider]/chat/completions` | POST | Dedicated per-provider chat completions with model validation |
|
||||
| `/v1/providers/[provider]/embeddings` | POST | Dedicated per-provider embeddings with model validation |
|
||||
| `/v1/providers/[provider]/images/generations` | POST | Dedicated per-provider image generation with model validation |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist management |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget configuration (passthrough/auto/custom/adaptive) |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt injection for all requests |
|
||||
| `/api/sessions` | GET | Active session tracking and metrics |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
|
||||
---
|
||||
|
||||
## 5. Key Design Patterns
|
||||
|
||||
### 5.1 Hub-and-Spoke Translation
|
||||
|
||||
All formats translate through **OpenAI format as the hub**. Adding a new provider only requires writing **one pair** of translators (to/from OpenAI), not N pairs.
|
||||
|
||||
### 5.2 Executor Strategy Pattern
|
||||
|
||||
Each provider has a dedicated executor class inheriting from `BaseExecutor`. The factory in `executors/index.ts` selects the right one at runtime.
|
||||
|
||||
### 5.3 Self-Registering Plugin System
|
||||
|
||||
Translator modules register themselves on import via `register()`. Adding a new translator is just creating a file and importing it.
|
||||
|
||||
### 5.4 Account Fallback with Exponential Backoff
|
||||
|
||||
When a provider returns 429/401/500, the system can switch to the next account, applying exponential cooldowns (1s → 2s → 4s → max 2min).
|
||||
|
||||
### 5.5 Combo Model Chains
|
||||
|
||||
A "combo" groups multiple `provider/model` strings. If the first fails, fallback to the next automatically.
|
||||
|
||||
### 5.6 Stateful Streaming Translation
|
||||
|
||||
Response translation maintains state across SSE chunks (thinking block tracking, tool call accumulation, content block indexing) via the `initState()` mechanism.
|
||||
|
||||
### 5.7 Usage Safety Buffer
|
||||
|
||||
A 2000-token buffer is added to reported usage to prevent clients from hitting context window limits due to overhead from system prompts and format translation.
|
||||
|
||||
---
|
||||
|
||||
## 6. Supported Formats
|
||||
|
||||
| Format | Direction | Identifier |
|
||||
| ----------------------- | --------------- | ------------------ |
|
||||
| OpenAI Chat Completions | source + target | `openai` |
|
||||
| OpenAI Responses API | source + target | `openai-responses` |
|
||||
| Anthropic Claude | source + target | `claude` |
|
||||
| Google Gemini | source + target | `gemini` |
|
||||
| Google Gemini CLI | target only | `gemini-cli` |
|
||||
| Antigravity | source + target | `antigravity` |
|
||||
| AWS Kiro | target only | `kiro` |
|
||||
| Cursor | target only | `cursor` |
|
||||
|
||||
---
|
||||
|
||||
## 7. Supported Providers
|
||||
|
||||
| Provider | Auth Method | Executor | Key Notes |
|
||||
| ------------------------ | ---------------------- | ----------- | --------------------------------------------- |
|
||||
| Anthropic Claude | API key or OAuth | Default | Uses `x-api-key` header |
|
||||
| Google Gemini | API key or OAuth | Default | Uses `x-goog-api-key` header |
|
||||
| Google Gemini CLI | OAuth | GeminiCLI | Uses `streamGenerateContent` endpoint |
|
||||
| Antigravity | OAuth | Antigravity | Multi-URL fallback, custom retry parsing |
|
||||
| OpenAI | API key | Default | Standard Bearer auth |
|
||||
| Codex | OAuth | Codex | Injects system instructions, manages thinking |
|
||||
| GitHub Copilot | OAuth + Copilot token | Github | Dual token, VSCode header mimicking |
|
||||
| Kiro (AWS) | AWS SSO OIDC or Social | Kiro | Binary EventStream parsing |
|
||||
| Cursor IDE | Checksum auth | Cursor | Protobuf encoding, SHA-256 checksums |
|
||||
| Qwen | OAuth | Default | Standard auth |
|
||||
| iFlow | OAuth (Basic + Bearer) | Default | Dual auth header |
|
||||
| OpenRouter | API key | Default | Standard Bearer auth |
|
||||
| GLM, Kimi, MiniMax | API key | Default | Claude-compatible, use `x-api-key` |
|
||||
| `openai-compatible-*` | API key | Default | Dynamic: any OpenAI-compatible endpoint |
|
||||
| `anthropic-compatible-*` | API key | Default | Dynamic: any Claude-compatible endpoint |
|
||||
|
||||
---
|
||||
|
||||
## 8. Data Flow Summary
|
||||
|
||||
### Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor\nbuildUrl + buildHeaders"]
|
||||
D --> E["fetch(providerURL)"]
|
||||
E --> F["createSSEStream()\nTRANSLATE mode"]
|
||||
F --> G["parseSSELine()"]
|
||||
G --> H["translateResponse()\ntarget → OpenAI → source"]
|
||||
H --> I["extractUsage()\n+ addBuffer"]
|
||||
I --> J["formatSSE()"]
|
||||
J --> K["Client receives\ntranslated SSE"]
|
||||
K --> L["logUsage()\nsaveRequestUsage()"]
|
||||
```
|
||||
|
||||
### Non-Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor.execute()"]
|
||||
D --> E["translateResponse()\ntarget → OpenAI → source"]
|
||||
E --> F["Return JSON\nresponse"]
|
||||
```
|
||||
|
||||
### Bypass Flow (Claude CLI)
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Claude CLI request"] --> B{"Match bypass\npattern?"}
|
||||
B -->|"Title/Warmup/Count"| C["Generate fake\nOpenAI response"]
|
||||
B -->|"No match"| D["Normal flow"]
|
||||
C --> E["Translate to\nsource format"]
|
||||
E --> F["Return without\ncalling provider"]
|
||||
```
|
||||
@@ -0,0 +1,148 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/FEATURES.md) · 🇪🇸 [es](../es/FEATURES.md) · 🇫🇷 [fr](../fr/FEATURES.md) · 🇩🇪 [de](../de/FEATURES.md) · 🇮🇹 [it](../it/FEATURES.md) · 🇷🇺 [ru](../ru/FEATURES.md) · 🇨🇳 [zh-CN](../zh-CN/FEATURES.md) · 🇯🇵 [ja](../ja/FEATURES.md) · 🇰🇷 [ko](../ko/FEATURES.md) · 🇸🇦 [ar](../ar/FEATURES.md) · 🇮🇳 [in](../in/FEATURES.md) · 🇹🇭 [th](../th/FEATURES.md) · 🇻🇳 [vi](../vi/FEATURES.md) · 🇮🇩 [id](../id/FEATURES.md) · 🇲🇾 [ms](../ms/FEATURES.md) · 🇳🇱 [nl](../nl/FEATURES.md) · 🇵🇱 [pl](../pl/FEATURES.md) · 🇸🇪 [sv](../sv/FEATURES.md) · 🇳🇴 [no](../no/FEATURES.md) · 🇩🇰 [da](../da/FEATURES.md) · 🇫🇮 [fi](../fi/FEATURES.md) · 🇵🇹 [pt](../pt/FEATURES.md) · 🇷🇴 [ro](../ro/FEATURES.md) · 🇭🇺 [hu](../hu/FEATURES.md) · 🇧🇬 [bg](../bg/FEATURES.md) · 🇸🇰 [sk](../sk/FEATURES.md) · 🇺🇦 [uk-UA](../uk-UA/FEATURES.md) · 🇮🇱 [he](../he/FEATURES.md) · 🇵🇭 [phi](../phi/FEATURES.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Dashboard Features Gallery
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](FEATURES.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/FEATURES.md) | 🇪🇸 [Español](i18n/es/FEATURES.md) | 🇫🇷 [Français](i18n/fr/FEATURES.md) | 🇮🇹 [Italiano](i18n/it/FEATURES.md) | 🇷🇺 [Русский](i18n/ru/FEATURES.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/FEATURES.md) | 🇩🇪 [Deutsch](i18n/de/FEATURES.md) | 🇮🇳 [हिन्दी](i18n/in/FEATURES.md) | 🇹🇭 [ไทย](i18n/th/FEATURES.md) | 🇺🇦 [Українська](i18n/uk-UA/FEATURES.md) | 🇸🇦 [العربية](i18n/ar/FEATURES.md) | 🇯🇵 [日本語](i18n/ja/FEATURES.md) | 🇻🇳 [Tiếng Việt](i18n/vi/FEATURES.md) | 🇧🇬 [Български](i18n/bg/FEATURES.md) | 🇩🇰 [Dansk](i18n/da/FEATURES.md) | 🇫🇮 [Suomi](i18n/fi/FEATURES.md) | 🇮🇱 [עברית](i18n/he/FEATURES.md) | 🇭🇺 [Magyar](i18n/hu/FEATURES.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/FEATURES.md) | 🇰🇷 [한국어](i18n/ko/FEATURES.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/FEATURES.md) | 🇳🇱 [Nederlands](i18n/nl/FEATURES.md) | 🇳🇴 [Norsk](i18n/no/FEATURES.md) | 🇵🇹 [Português (Portugal)](i18n/pt/FEATURES.md) | 🇷🇴 [Română](i18n/ro/FEATURES.md) | 🇵🇱 [Polski](i18n/pl/FEATURES.md) | 🇸🇰 [Slovenčina](i18n/sk/FEATURES.md) | 🇸🇪 [Svenska](i18n/sv/FEATURES.md) | 🇵🇭 [Filipino](i18n/phi/FEATURES.md)
|
||||
|
||||
Visual guide to every section of the OmniRoute dashboard.
|
||||
|
||||
---
|
||||
|
||||
## 🔌 Providers
|
||||
|
||||
Manage AI provider connections: OAuth providers (Claude Code, Codex, Gemini CLI), API key providers (Groq, DeepSeek, OpenRouter), and free providers (iFlow, Qwen, Kiro).
|
||||
|
||||
- **Ollama Cloud** — Cloud-hosted Ollama models at `api.ollama.com` (free "Light usage" tier); use `ollamacloud/<model>` prefix
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
Create model routing combos with 6 strategies: priority, weighted, round-robin, random, least-used, and cost-optimized. Each combo chains multiple models with automatic fallback and includes quick templates and readiness checks.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 📊 Analytics
|
||||
|
||||
Comprehensive usage analytics with token consumption, cost estimates, activity heatmaps, weekly distribution charts, and per-provider breakdowns.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🏥 System Health
|
||||
|
||||
Real-time monitoring: uptime, memory, version, latency percentiles (p50/p95/p99), cache statistics, and provider circuit breaker states.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 Translator Playground
|
||||
|
||||
Four modes for debugging API translations: **Playground** (format converter), **Chat Tester** (live requests), **Test Bench** (batch tests), and **Live Monitor** (real-time stream).
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎮 Model Playground _(v2.0.9+)_
|
||||
|
||||
Test any model directly from the dashboard. Select provider, model, and endpoint, write prompts with Monaco Editor, stream responses in real-time, abort mid-stream, and view timing metrics.
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Themes _(v2.0.5+)_
|
||||
|
||||
Customizable color themes for the entire dashboard. Choose from 7 preset colors (Coral, Blue, Red, Green, Violet, Orange, Cyan) or create a custom theme by picking any hex color. Supports light, dark, and system mode.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Settings
|
||||
|
||||
Comprehensive settings panel with tabs:
|
||||
|
||||
- **General** — System storage, backup management (export/import database)
|
||||
- **Appearance** — Theme selector (dark/light/system), color theme presets and custom colors, health log visibility
|
||||
- **Security** — API endpoint protection, custom provider blocking, IP filtering, session info
|
||||
- **Routing** — Model aliases, background task degradation
|
||||
- **Resilience** — Rate limit persistence, circuit breaker tuning
|
||||
- **Advanced** — Configuration overrides
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Tools
|
||||
|
||||
One-click configuration for AI coding tools: Claude Code, Codex CLI, Gemini CLI, OpenClaw, Kilo Code, Antigravity, Cline, Continue, Cursor, and Factory Droid. Features automated config apply/reset, connection profiles, and model mapping.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🤖 CLI Agents _(v2.0.11+)_
|
||||
|
||||
Dashboard for discovering and managing CLI agents. Shows a grid of 14 built-in agents (Codex, Claude, Goose, Gemini CLI, OpenClaw, Aider, OpenCode, Cline, Qwen Code, ForgeCode, Amazon Q, Open Interpreter, Cursor CLI, Warp) with:
|
||||
|
||||
- **Installation status** — Installed / Not Found with version detection
|
||||
- **Protocol badges** — stdio, HTTP, etc.
|
||||
- **Custom agents** — Register any CLI tool via form (name, binary, version command, spawn args)
|
||||
- **CLI Fingerprint Matching** — Per-provider toggle to match native CLI request signatures, reducing ban risk while preserving proxy IP
|
||||
|
||||
---
|
||||
|
||||
## 🖼️ Media _(v2.0.3+)_
|
||||
|
||||
Generate images, videos, and music from the dashboard. Supports OpenAI, xAI, Together, Hyperbolic, SD WebUI, ComfyUI, AnimateDiff, Stable Audio Open, and MusicGen.
|
||||
|
||||
---
|
||||
|
||||
## 📝 Request Logs
|
||||
|
||||
Real-time request logging with filtering by provider, model, account, and API key. Shows status codes, token usage, latency, and response details.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🌐 API Endpoint
|
||||
|
||||
Your unified API endpoint with capability breakdown: Chat Completions, Responses API, Embeddings, Image Generation, Reranking, Audio Transcription, Text-to-Speech, Moderations, and registered API keys. Cloud proxy support for remote access.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔑 API Key Management
|
||||
|
||||
Create, scope, and revoke API keys. Each key can be restricted to specific models/providers with full access or read-only permissions. Visual key management with usage tracking.
|
||||
|
||||
---
|
||||
|
||||
## 📋 Audit Log
|
||||
|
||||
Administrative action tracking with filtering by action type, actor, target, IP address, and timestamp. Full security event history.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application
|
||||
|
||||
Native Electron desktop app for Windows, macOS, and Linux. Run OmniRoute as a standalone application with system tray integration, offline support, auto-update, and one-click install.
|
||||
|
||||
Key features:
|
||||
|
||||
- Server readiness polling (no blank screen on cold start)
|
||||
- System tray with port management
|
||||
- Content Security Policy
|
||||
- Single-instance lock
|
||||
- Auto-update on restart
|
||||
- Platform-conditional UI (macOS traffic lights, Windows/Linux default titlebar)
|
||||
|
||||
📖 See [`electron/README.md`](../electron/README.md) for full documentation.
|
||||
@@ -0,0 +1,87 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/MCP-SERVER.md) · 🇪🇸 [es](../es/MCP-SERVER.md) · 🇫🇷 [fr](../fr/MCP-SERVER.md) · 🇩🇪 [de](../de/MCP-SERVER.md) · 🇮🇹 [it](../it/MCP-SERVER.md) · 🇷🇺 [ru](../ru/MCP-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/MCP-SERVER.md) · 🇯🇵 [ja](../ja/MCP-SERVER.md) · 🇰🇷 [ko](../ko/MCP-SERVER.md) · 🇸🇦 [ar](../ar/MCP-SERVER.md) · 🇮🇳 [in](../in/MCP-SERVER.md) · 🇹🇭 [th](../th/MCP-SERVER.md) · 🇻🇳 [vi](../vi/MCP-SERVER.md) · 🇮🇩 [id](../id/MCP-SERVER.md) · 🇲🇾 [ms](../ms/MCP-SERVER.md) · 🇳🇱 [nl](../nl/MCP-SERVER.md) · 🇵🇱 [pl](../pl/MCP-SERVER.md) · 🇸🇪 [sv](../sv/MCP-SERVER.md) · 🇳🇴 [no](../no/MCP-SERVER.md) · 🇩🇰 [da](../da/MCP-SERVER.md) · 🇫🇮 [fi](../fi/MCP-SERVER.md) · 🇵🇹 [pt](../pt/MCP-SERVER.md) · 🇷🇴 [ro](../ro/MCP-SERVER.md) · 🇭🇺 [hu](../hu/MCP-SERVER.md) · 🇧🇬 [bg](../bg/MCP-SERVER.md) · 🇸🇰 [sk](../sk/MCP-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/MCP-SERVER.md) · 🇮🇱 [he](../he/MCP-SERVER.md) · 🇵🇭 [phi](../phi/MCP-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute MCP Server Documentation
|
||||
|
||||
> Model Context Protocol server with 16 intelligent tools
|
||||
|
||||
## Installation
|
||||
|
||||
OmniRoute MCP is built-in. Start it with:
|
||||
|
||||
```bash
|
||||
omniroute --mcp
|
||||
```
|
||||
|
||||
Or via the open-sse transport:
|
||||
|
||||
```bash
|
||||
# HTTP streamable transport (port 20130)
|
||||
omniroute --dev # MCP auto-starts on /mcp endpoint
|
||||
```
|
||||
|
||||
## IDE Configuration
|
||||
|
||||
See [IDE Configs](integrations/ide-configs.md) for Antigravity, Cursor, Copilot, and Claude Desktop setup.
|
||||
|
||||
---
|
||||
|
||||
## Essential Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :------------------------------ | :--------------------------------------- |
|
||||
| `omniroute_get_health` | Gateway health, circuit breakers, uptime |
|
||||
| `omniroute_list_combos` | All configured combos with models |
|
||||
| `omniroute_get_combo_metrics` | Performance metrics for a specific combo |
|
||||
| `omniroute_switch_combo` | Switch active combo by ID/name |
|
||||
| `omniroute_check_quota` | Quota status per provider or all |
|
||||
| `omniroute_route_request` | Send a chat completion through OmniRoute |
|
||||
| `omniroute_cost_report` | Cost analytics for a time period |
|
||||
| `omniroute_list_models_catalog` | Full model catalog with capabilities |
|
||||
|
||||
## Advanced Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :--------------------------------- | :---------------------------------------------- |
|
||||
| `omniroute_simulate_route` | Dry-run routing simulation with fallback tree |
|
||||
| `omniroute_set_budget_guard` | Session budget with degrade/block/alert actions |
|
||||
| `omniroute_set_resilience_profile` | Apply conservative/balanced/aggressive preset |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo |
|
||||
| `omniroute_get_provider_metrics` | Detailed metrics for one provider |
|
||||
| `omniroute_best_combo_for_task` | Task-fitness recommendation with alternatives |
|
||||
| `omniroute_explain_route` | Explain a past routing decision |
|
||||
| `omniroute_get_session_snapshot` | Full session state: costs, tokens, errors |
|
||||
|
||||
## Authentication
|
||||
|
||||
MCP tools are authenticated via API key scopes. Each tool requires specific scopes:
|
||||
|
||||
| Scope | Tools |
|
||||
| :------------- | :----------------------------------------------- |
|
||||
| `read:health` | get_health, get_provider_metrics |
|
||||
| `read:combos` | list_combos, get_combo_metrics |
|
||||
| `write:combos` | switch_combo |
|
||||
| `read:quota` | check_quota |
|
||||
| `write:route` | route_request, simulate_route, test_combo |
|
||||
| `read:usage` | cost_report, get_session_snapshot, explain_route |
|
||||
| `write:config` | set_budget_guard, set_resilience_profile |
|
||||
| `read:models` | list_models_catalog, best_combo_for_task |
|
||||
|
||||
## Audit Logging
|
||||
|
||||
Every tool call is logged to `mcp_tool_audit` with:
|
||||
|
||||
- Tool name, arguments, result
|
||||
- Duration (ms), success/failure
|
||||
- API key hash, timestamp
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------------ |
|
||||
| `open-sse/mcp-server/server.ts` | MCP server creation + 16 tool registrations |
|
||||
| `open-sse/mcp-server/transport.ts` | Stdio + HTTP transport |
|
||||
| `open-sse/mcp-server/auth.ts` | API key + scope validation |
|
||||
| `open-sse/mcp-server/audit.ts` | Tool call audit logging |
|
||||
| `open-sse/mcp-server/tools/advancedTools.ts` | 8 advanced tool handlers |
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,37 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/RELEASE_CHECKLIST.md) · 🇪🇸 [es](../es/RELEASE_CHECKLIST.md) · 🇫🇷 [fr](../fr/RELEASE_CHECKLIST.md) · 🇩🇪 [de](../de/RELEASE_CHECKLIST.md) · 🇮🇹 [it](../it/RELEASE_CHECKLIST.md) · 🇷🇺 [ru](../ru/RELEASE_CHECKLIST.md) · 🇨🇳 [zh-CN](../zh-CN/RELEASE_CHECKLIST.md) · 🇯🇵 [ja](../ja/RELEASE_CHECKLIST.md) · 🇰🇷 [ko](../ko/RELEASE_CHECKLIST.md) · 🇸🇦 [ar](../ar/RELEASE_CHECKLIST.md) · 🇮🇳 [in](../in/RELEASE_CHECKLIST.md) · 🇹🇭 [th](../th/RELEASE_CHECKLIST.md) · 🇻🇳 [vi](../vi/RELEASE_CHECKLIST.md) · 🇮🇩 [id](../id/RELEASE_CHECKLIST.md) · 🇲🇾 [ms](../ms/RELEASE_CHECKLIST.md) · 🇳🇱 [nl](../nl/RELEASE_CHECKLIST.md) · 🇵🇱 [pl](../pl/RELEASE_CHECKLIST.md) · 🇸🇪 [sv](../sv/RELEASE_CHECKLIST.md) · 🇳🇴 [no](../no/RELEASE_CHECKLIST.md) · 🇩🇰 [da](../da/RELEASE_CHECKLIST.md) · 🇫🇮 [fi](../fi/RELEASE_CHECKLIST.md) · 🇵🇹 [pt](../pt/RELEASE_CHECKLIST.md) · 🇷🇴 [ro](../ro/RELEASE_CHECKLIST.md) · 🇭🇺 [hu](../hu/RELEASE_CHECKLIST.md) · 🇧🇬 [bg](../bg/RELEASE_CHECKLIST.md) · 🇸🇰 [sk](../sk/RELEASE_CHECKLIST.md) · 🇺🇦 [uk-UA](../uk-UA/RELEASE_CHECKLIST.md) · 🇮🇱 [he](../he/RELEASE_CHECKLIST.md) · 🇵🇭 [phi](../phi/RELEASE_CHECKLIST.md)
|
||||
|
||||
---
|
||||
|
||||
# Release Checklist
|
||||
|
||||
Use this checklist before tagging or publishing a new OmniRoute release.
|
||||
|
||||
## Version and Changelog
|
||||
|
||||
1. Bump `package.json` version (`x.y.z`) in the release branch.
|
||||
2. Move release notes from `## [Unreleased]` in `CHANGELOG.md` to a dated section:
|
||||
- `## [x.y.z] — YYYY-MM-DD`
|
||||
3. Keep `## [Unreleased]` as the first changelog section for upcoming work.
|
||||
4. Ensure the latest semver section in `CHANGELOG.md` equals `package.json` version.
|
||||
|
||||
## API Docs
|
||||
|
||||
1. Update `docs/openapi.yaml`:
|
||||
- `info.version` must equal `package.json` version.
|
||||
2. Validate endpoint examples if API contracts changed.
|
||||
|
||||
## Runtime Docs
|
||||
|
||||
1. Review `docs/ARCHITECTURE.md` for storage/runtime drift.
|
||||
2. Review `docs/TROUBLESHOOTING.md` for env var and operational drift.
|
||||
3. Update localized docs if source docs changed significantly.
|
||||
|
||||
## Automated Check
|
||||
|
||||
Run the sync guard locally before opening PR:
|
||||
|
||||
```bash
|
||||
npm run check:docs-sync
|
||||
```
|
||||
|
||||
CI also runs this check in `.github/workflows/ci.yml` (lint job).
|
||||
@@ -0,0 +1,258 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/TROUBLESHOOTING.md) · 🇪🇸 [es](../es/TROUBLESHOOTING.md) · 🇫🇷 [fr](../fr/TROUBLESHOOTING.md) · 🇩🇪 [de](../de/TROUBLESHOOTING.md) · 🇮🇹 [it](../it/TROUBLESHOOTING.md) · 🇷🇺 [ru](../ru/TROUBLESHOOTING.md) · 🇨🇳 [zh-CN](../zh-CN/TROUBLESHOOTING.md) · 🇯🇵 [ja](../ja/TROUBLESHOOTING.md) · 🇰🇷 [ko](../ko/TROUBLESHOOTING.md) · 🇸🇦 [ar](../ar/TROUBLESHOOTING.md) · 🇮🇳 [in](../in/TROUBLESHOOTING.md) · 🇹🇭 [th](../th/TROUBLESHOOTING.md) · 🇻🇳 [vi](../vi/TROUBLESHOOTING.md) · 🇮🇩 [id](../id/TROUBLESHOOTING.md) · 🇲🇾 [ms](../ms/TROUBLESHOOTING.md) · 🇳🇱 [nl](../nl/TROUBLESHOOTING.md) · 🇵🇱 [pl](../pl/TROUBLESHOOTING.md) · 🇸🇪 [sv](../sv/TROUBLESHOOTING.md) · 🇳🇴 [no](../no/TROUBLESHOOTING.md) · 🇩🇰 [da](../da/TROUBLESHOOTING.md) · 🇫🇮 [fi](../fi/TROUBLESHOOTING.md) · 🇵🇹 [pt](../pt/TROUBLESHOOTING.md) · 🇷🇴 [ro](../ro/TROUBLESHOOTING.md) · 🇭🇺 [hu](../hu/TROUBLESHOOTING.md) · 🇧🇬 [bg](../bg/TROUBLESHOOTING.md) · 🇸🇰 [sk](../sk/TROUBLESHOOTING.md) · 🇺🇦 [uk-UA](../uk-UA/TROUBLESHOOTING.md) · 🇮🇱 [he](../he/TROUBLESHOOTING.md) · 🇵🇭 [phi](../phi/TROUBLESHOOTING.md)
|
||||
|
||||
---
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](TROUBLESHOOTING.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/TROUBLESHOOTING.md) | 🇪🇸 [Español](i18n/es/TROUBLESHOOTING.md) | 🇫🇷 [Français](i18n/fr/TROUBLESHOOTING.md) | 🇮🇹 [Italiano](i18n/it/TROUBLESHOOTING.md) | 🇷🇺 [Русский](i18n/ru/TROUBLESHOOTING.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/TROUBLESHOOTING.md) | 🇩🇪 [Deutsch](i18n/de/TROUBLESHOOTING.md) | 🇮🇳 [हिन्दी](i18n/in/TROUBLESHOOTING.md) | 🇹🇭 [ไทย](i18n/th/TROUBLESHOOTING.md) | 🇺🇦 [Українська](i18n/uk-UA/TROUBLESHOOTING.md) | 🇸🇦 [العربية](i18n/ar/TROUBLESHOOTING.md) | 🇯🇵 [日本語](i18n/ja/TROUBLESHOOTING.md) | 🇻🇳 [Tiếng Việt](i18n/vi/TROUBLESHOOTING.md) | 🇧🇬 [Български](i18n/bg/TROUBLESHOOTING.md) | 🇩🇰 [Dansk](i18n/da/TROUBLESHOOTING.md) | 🇫🇮 [Suomi](i18n/fi/TROUBLESHOOTING.md) | 🇮🇱 [עברית](i18n/he/TROUBLESHOOTING.md) | 🇭🇺 [Magyar](i18n/hu/TROUBLESHOOTING.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/TROUBLESHOOTING.md) | 🇰🇷 [한국어](i18n/ko/TROUBLESHOOTING.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/TROUBLESHOOTING.md) | 🇳🇱 [Nederlands](i18n/nl/TROUBLESHOOTING.md) | 🇳🇴 [Norsk](i18n/no/TROUBLESHOOTING.md) | 🇵🇹 [Português (Portugal)](i18n/pt/TROUBLESHOOTING.md) | 🇷🇴 [Română](i18n/ro/TROUBLESHOOTING.md) | 🇵🇱 [Polski](i18n/pl/TROUBLESHOOTING.md) | 🇸🇰 [Slovenčina](i18n/sk/TROUBLESHOOTING.md) | 🇸🇪 [Svenska](i18n/sv/TROUBLESHOOTING.md) | 🇵🇭 [Filipino](i18n/phi/TROUBLESHOOTING.md)
|
||||
|
||||
Common problems and solutions for OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Quick Fixes
|
||||
|
||||
| Problem | Solution |
|
||||
| ----------------------------- | ------------------------------------------------------------------ |
|
||||
| First login not working | Set `INITIAL_PASSWORD` in `.env` (no hardcoded default) |
|
||||
| Dashboard opens on wrong port | Set `PORT=20128` and `NEXT_PUBLIC_BASE_URL=http://localhost:20128` |
|
||||
| No request logs under `logs/` | Set `ENABLE_REQUEST_LOGS=true` |
|
||||
| EACCES: permission denied | Set `DATA_DIR=/path/to/writable/dir` to override `~/.omniroute` |
|
||||
| Routing strategy not saving | Update to v1.4.11+ (Zod schema fix for settings persistence) |
|
||||
|
||||
---
|
||||
|
||||
## Provider Issues
|
||||
|
||||
### "Language model did not provide messages"
|
||||
|
||||
**Cause:** Provider quota exhausted.
|
||||
|
||||
**Fix:**
|
||||
|
||||
1. Check dashboard quota tracker
|
||||
2. Use a combo with fallback tiers
|
||||
3. Switch to cheaper/free tier
|
||||
|
||||
### Rate Limiting
|
||||
|
||||
**Cause:** Subscription quota exhausted.
|
||||
|
||||
**Fix:**
|
||||
|
||||
- Add fallback: `cc/claude-opus-4-6 → glm/glm-4.7 → if/kimi-k2-thinking`
|
||||
- Use GLM/MiniMax as cheap backup
|
||||
|
||||
### OAuth Token Expired
|
||||
|
||||
OmniRoute auto-refreshes tokens. If issues persist:
|
||||
|
||||
1. Dashboard → Provider → Reconnect
|
||||
2. Delete and re-add the provider connection
|
||||
|
||||
---
|
||||
|
||||
## Cloud Issues
|
||||
|
||||
### Cloud Sync Errors
|
||||
|
||||
1. Verify `BASE_URL` points to your running instance (e.g., `http://localhost:20128`)
|
||||
2. Verify `CLOUD_URL` points to your cloud endpoint (e.g., `https://omniroute.dev`)
|
||||
3. Keep `NEXT_PUBLIC_*` values aligned with server-side values
|
||||
|
||||
### Cloud `stream=false` Returns 500
|
||||
|
||||
**Symptom:** `Unexpected token 'd'...` on cloud endpoint for non-streaming calls.
|
||||
|
||||
**Cause:** Upstream returns SSE payload while client expects JSON.
|
||||
|
||||
**Workaround:** Use `stream=true` for cloud direct calls. Local runtime includes SSE→JSON fallback.
|
||||
|
||||
### Cloud Says Connected but "Invalid API key"
|
||||
|
||||
1. Create a fresh key from local dashboard (`/api/keys`)
|
||||
2. Run cloud sync: Enable Cloud → Sync Now
|
||||
3. Old/non-synced keys can still return `401` on cloud
|
||||
|
||||
---
|
||||
|
||||
## Docker Issues
|
||||
|
||||
### CLI Tool Shows Not Installed
|
||||
|
||||
1. Check runtime fields: `curl http://localhost:20128/api/cli-tools/runtime/codex | jq`
|
||||
2. For portable mode: use image target `runner-cli` (bundled CLIs)
|
||||
3. For host mount mode: set `CLI_EXTRA_PATHS` and mount host bin directory as read-only
|
||||
4. If `installed=true` and `runnable=false`: binary was found but failed healthcheck
|
||||
|
||||
### Quick Runtime Validation
|
||||
|
||||
```bash
|
||||
curl -s http://localhost:20128/api/cli-tools/codex-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
curl -s http://localhost:20128/api/cli-tools/claude-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
curl -s http://localhost:20128/api/cli-tools/openclaw-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cost Issues
|
||||
|
||||
### High Costs
|
||||
|
||||
1. Check usage stats in Dashboard → Usage
|
||||
2. Switch primary model to GLM/MiniMax
|
||||
3. Use free tier (Gemini CLI, iFlow) for non-critical tasks
|
||||
4. Set cost budgets per API key: Dashboard → API Keys → Budget
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
### Enable Request Logs
|
||||
|
||||
Set `ENABLE_REQUEST_LOGS=true` in your `.env` file. Logs appear under `logs/` directory.
|
||||
|
||||
### Check Provider Health
|
||||
|
||||
```bash
|
||||
# Health dashboard
|
||||
http://localhost:20128/dashboard/health
|
||||
|
||||
# API health check
|
||||
curl http://localhost:20128/api/monitoring/health
|
||||
```
|
||||
|
||||
### Runtime Storage
|
||||
|
||||
- Main state: `${DATA_DIR}/storage.sqlite` (providers, combos, aliases, keys, settings)
|
||||
- Usage: SQLite tables in `storage.sqlite` (`usage_history`, `call_logs`, `proxy_logs`) + optional `${DATA_DIR}/log.txt` and `${DATA_DIR}/call_logs/`
|
||||
- Request logs: `<repo>/logs/...` (when `ENABLE_REQUEST_LOGS=true`)
|
||||
|
||||
---
|
||||
|
||||
## Circuit Breaker Issues
|
||||
|
||||
### Provider stuck in OPEN state
|
||||
|
||||
When a provider's circuit breaker is OPEN, requests are blocked until the cooldown expires.
|
||||
|
||||
**Fix:**
|
||||
|
||||
1. Go to **Dashboard → Settings → Resilience**
|
||||
2. Check the circuit breaker card for the affected provider
|
||||
3. Click **Reset All** to clear all breakers, or wait for the cooldown to expire
|
||||
4. Verify the provider is actually available before resetting
|
||||
|
||||
### Provider keeps tripping the circuit breaker
|
||||
|
||||
If a provider repeatedly enters OPEN state:
|
||||
|
||||
1. Check **Dashboard → Health → Provider Health** for the failure pattern
|
||||
2. Go to **Settings → Resilience → Provider Profiles** and increase the failure threshold
|
||||
3. Check if the provider has changed API limits or requires re-authentication
|
||||
4. Review latency telemetry — high latency may cause timeout-based failures
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription Issues
|
||||
|
||||
### "Unsupported model" error
|
||||
|
||||
- Ensure you're using the correct prefix: `deepgram/nova-3` or `assemblyai/best`
|
||||
- Verify the provider is connected in **Dashboard → Providers**
|
||||
|
||||
### Transcription returns empty or fails
|
||||
|
||||
- Check supported audio formats: `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`
|
||||
- Verify file size is within provider limits (typically < 25MB)
|
||||
- Check provider API key validity in the provider card
|
||||
|
||||
---
|
||||
|
||||
## Translator Debugging
|
||||
|
||||
Use **Dashboard → Translator** to debug format translation issues:
|
||||
|
||||
| Mode | When to Use |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------- |
|
||||
| **Playground** | Compare input/output formats side by side — paste a failing request to see how it translates |
|
||||
| **Chat Tester** | Send live messages and inspect the full request/response payload including headers |
|
||||
| **Test Bench** | Run batch tests across format combinations to find which translations are broken |
|
||||
| **Live Monitor** | Watch real-time request flow to catch intermittent translation issues |
|
||||
|
||||
### Common format issues
|
||||
|
||||
- **Thinking tags not appearing** — Check if the target provider supports thinking and the thinking budget setting
|
||||
- **Tool calls dropping** — Some format translations may strip unsupported fields; verify in Playground mode
|
||||
- **System prompt missing** — Claude and Gemini handle system prompts differently; check translation output
|
||||
- **SDK returns raw string instead of object** — Fixed in v1.1.0: response sanitizer now strips non-standard fields (`x_groq`, `usage_breakdown`, etc.) that cause OpenAI SDK Pydantic validation failures
|
||||
- **GLM/ERNIE rejects `system` role** — Fixed in v1.1.0: role normalizer automatically merges system messages into user messages for incompatible models
|
||||
- **`developer` role not recognized** — Fixed in v1.1.0: automatically converted to `system` for non-OpenAI providers
|
||||
- **`json_schema` not working with Gemini** — Fixed in v1.1.0: `response_format` is now converted to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
---
|
||||
|
||||
## Resilience Settings
|
||||
|
||||
### Auto rate-limit not triggering
|
||||
|
||||
- Auto rate-limit only applies to API key providers (not OAuth/subscription)
|
||||
- Verify **Settings → Resilience → Provider Profiles** has auto-rate-limit enabled
|
||||
- Check if the provider returns `429` status codes or `Retry-After` headers
|
||||
|
||||
### Tuning exponential backoff
|
||||
|
||||
Provider profiles support these settings:
|
||||
|
||||
- **Base delay** — Initial wait time after first failure (default: 1s)
|
||||
- **Max delay** — Maximum wait time cap (default: 30s)
|
||||
- **Multiplier** — How much to increase delay per consecutive failure (default: 2x)
|
||||
|
||||
### Anti-thundering herd
|
||||
|
||||
When many concurrent requests hit a rate-limited provider, OmniRoute uses mutex + auto rate-limiting to serialize requests and prevent cascading failures. This is automatic for API key providers.
|
||||
|
||||
---
|
||||
|
||||
## Optional RAG / LLM failure taxonomy (16 problems)
|
||||
|
||||
Some OmniRoute users place the gateway in front of RAG or agent stacks. In those setups it is common to see a strange pattern: OmniRoute looks healthy (providers up, routing profiles ok, no rate limit alerts) but the final answer is still wrong.
|
||||
|
||||
In practice these incidents usually come from the downstream RAG pipeline, not from the gateway itself.
|
||||
|
||||
If you want a shared vocabulary to describe those failures you can use the WFGY ProblemMap, an external MIT license text resource that defines sixteen recurring RAG / LLM failure patterns. At a high level it covers:
|
||||
|
||||
- retrieval drift and broken context boundaries
|
||||
- empty or stale indexes and vector stores
|
||||
- embedding versus semantic mismatch
|
||||
- prompt assembly and context window issues
|
||||
- logic collapse and overconfident answers
|
||||
- long chain and agent coordination failures
|
||||
- multi agent memory and role drift
|
||||
- deployment and bootstrap ordering problems
|
||||
|
||||
The idea is simple:
|
||||
|
||||
1. When you investigate a bad response, capture:
|
||||
- user task and request
|
||||
- route or provider combo in OmniRoute
|
||||
- any RAG context used downstream (retrieved documents, tool calls, etc)
|
||||
2. Map the incident to one or two WFGY ProblemMap numbers (`No.1` … `No.16`).
|
||||
3. Store the number in your own dashboard, runbook, or incident tracker next to the OmniRoute logs.
|
||||
4. Use the corresponding WFGY page to decide whether you need to change your RAG stack, retriever, or routing strategy.
|
||||
|
||||
Full text and concrete recipes live here (MIT license, text only):
|
||||
|
||||
[WFGY ProblemMap README](https://github.com/onestardao/WFGY/blob/main/ProblemMap/README.md)
|
||||
|
||||
You can ignore this section if you do not run RAG or agent pipelines behind OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Still Stuck?
|
||||
|
||||
- **GitHub Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **Architecture**: See [`docs/ARCHITECTURE.md`](ARCHITECTURE.md) for internal details
|
||||
- **API Reference**: See [`docs/API_REFERENCE.md`](API_REFERENCE.md) for all endpoints
|
||||
- **Health Dashboard**: Check **Dashboard → Health** for real-time system status
|
||||
- **Translator**: Use **Dashboard → Translator** to debug format issues
|
||||
@@ -0,0 +1,813 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/USER_GUIDE.md) · 🇪🇸 [es](../es/USER_GUIDE.md) · 🇫🇷 [fr](../fr/USER_GUIDE.md) · 🇩🇪 [de](../de/USER_GUIDE.md) · 🇮🇹 [it](../it/USER_GUIDE.md) · 🇷🇺 [ru](../ru/USER_GUIDE.md) · 🇨🇳 [zh-CN](../zh-CN/USER_GUIDE.md) · 🇯🇵 [ja](../ja/USER_GUIDE.md) · 🇰🇷 [ko](../ko/USER_GUIDE.md) · 🇸🇦 [ar](../ar/USER_GUIDE.md) · 🇮🇳 [in](../in/USER_GUIDE.md) · 🇹🇭 [th](../th/USER_GUIDE.md) · 🇻🇳 [vi](../vi/USER_GUIDE.md) · 🇮🇩 [id](../id/USER_GUIDE.md) · 🇲🇾 [ms](../ms/USER_GUIDE.md) · 🇳🇱 [nl](../nl/USER_GUIDE.md) · 🇵🇱 [pl](../pl/USER_GUIDE.md) · 🇸🇪 [sv](../sv/USER_GUIDE.md) · 🇳🇴 [no](../no/USER_GUIDE.md) · 🇩🇰 [da](../da/USER_GUIDE.md) · 🇫🇮 [fi](../fi/USER_GUIDE.md) · 🇵🇹 [pt](../pt/USER_GUIDE.md) · 🇷🇴 [ro](../ro/USER_GUIDE.md) · 🇭🇺 [hu](../hu/USER_GUIDE.md) · 🇧🇬 [bg](../bg/USER_GUIDE.md) · 🇸🇰 [sk](../sk/USER_GUIDE.md) · 🇺🇦 [uk-UA](../uk-UA/USER_GUIDE.md) · 🇮🇱 [he](../he/USER_GUIDE.md) · 🇵🇭 [phi](../phi/USER_GUIDE.md)
|
||||
|
||||
---
|
||||
|
||||
# User Guide
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](USER_GUIDE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/USER_GUIDE.md) | 🇪🇸 [Español](i18n/es/USER_GUIDE.md) | 🇫🇷 [Français](i18n/fr/USER_GUIDE.md) | 🇮🇹 [Italiano](i18n/it/USER_GUIDE.md) | 🇷🇺 [Русский](i18n/ru/USER_GUIDE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/USER_GUIDE.md) | 🇩🇪 [Deutsch](i18n/de/USER_GUIDE.md) | 🇮🇳 [हिन्दी](i18n/in/USER_GUIDE.md) | 🇹🇭 [ไทย](i18n/th/USER_GUIDE.md) | 🇺🇦 [Українська](i18n/uk-UA/USER_GUIDE.md) | 🇸🇦 [العربية](i18n/ar/USER_GUIDE.md) | 🇯🇵 [日本語](i18n/ja/USER_GUIDE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/USER_GUIDE.md) | 🇧🇬 [Български](i18n/bg/USER_GUIDE.md) | 🇩🇰 [Dansk](i18n/da/USER_GUIDE.md) | 🇫🇮 [Suomi](i18n/fi/USER_GUIDE.md) | 🇮🇱 [עברית](i18n/he/USER_GUIDE.md) | 🇭🇺 [Magyar](i18n/hu/USER_GUIDE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/USER_GUIDE.md) | 🇰🇷 [한국어](i18n/ko/USER_GUIDE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/USER_GUIDE.md) | 🇳🇱 [Nederlands](i18n/nl/USER_GUIDE.md) | 🇳🇴 [Norsk](i18n/no/USER_GUIDE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/USER_GUIDE.md) | 🇷🇴 [Română](i18n/ro/USER_GUIDE.md) | 🇵🇱 [Polski](i18n/pl/USER_GUIDE.md) | 🇸🇰 [Slovenčina](i18n/sk/USER_GUIDE.md) | 🇸🇪 [Svenska](i18n/sv/USER_GUIDE.md) | 🇵🇭 [Filipino](i18n/phi/USER_GUIDE.md)
|
||||
|
||||
Complete guide for configuring providers, creating combos, integrating CLI tools, and deploying OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Pricing at a Glance](#-pricing-at-a-glance)
|
||||
- [Use Cases](#-use-cases)
|
||||
- [Provider Setup](#-provider-setup)
|
||||
- [CLI Integration](#-cli-integration)
|
||||
- [Deployment](#-deployment)
|
||||
- [Available Models](#-available-models)
|
||||
- [Advanced Features](#-advanced-features)
|
||||
|
||||
---
|
||||
|
||||
## 💰 Pricing at a Glance
|
||||
|
||||
| Tier | Provider | Cost | Quota Reset | Best For |
|
||||
| ------------------- | ----------------- | ----------- | ---------------- | -------------------- |
|
||||
| **💳 SUBSCRIPTION** | Claude Code (Pro) | $20/mo | 5h + weekly | Already subscribed |
|
||||
| | Codex (Plus/Pro) | $20-200/mo | 5h + weekly | OpenAI users |
|
||||
| | Gemini CLI | **FREE** | 180K/mo + 1K/day | Everyone! |
|
||||
| | GitHub Copilot | $10-19/mo | Monthly | GitHub users |
|
||||
| **🔑 API KEY** | DeepSeek | Pay per use | None | Cheap reasoning |
|
||||
| | Groq | Pay per use | None | Ultra-fast inference |
|
||||
| | xAI (Grok) | Pay per use | None | Grok 4 reasoning |
|
||||
| | Mistral | Pay per use | None | EU-hosted models |
|
||||
| | Perplexity | Pay per use | None | Search-augmented |
|
||||
| | Together AI | Pay per use | None | Open-source models |
|
||||
| | Fireworks AI | Pay per use | None | Fast FLUX images |
|
||||
| | Cerebras | Pay per use | None | Wafer-scale speed |
|
||||
| | Cohere | Pay per use | None | Command R+ RAG |
|
||||
| | NVIDIA NIM | Pay per use | None | Enterprise models |
|
||||
| **💰 CHEAP** | GLM-4.7 | $0.6/1M | Daily 10AM | Budget backup |
|
||||
| | MiniMax M2.1 | $0.2/1M | 5-hour rolling | Cheapest option |
|
||||
| | Kimi K2 | $9/mo flat | 10M tokens/mo | Predictable cost |
|
||||
| **🆓 FREE** | iFlow | $0 | Unlimited | 8 models free |
|
||||
| | Qwen | $0 | Unlimited | 3 models free |
|
||||
| | Kiro | $0 | Unlimited | Claude free |
|
||||
|
||||
**💡 Pro Tip:** Start with Gemini CLI (180K free/month) + iFlow (unlimited free) combo = $0 cost!
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Use Cases
|
||||
|
||||
### Case 1: "I have Claude Pro subscription"
|
||||
|
||||
**Problem:** Quota expires unused, rate limits during heavy coding
|
||||
|
||||
```
|
||||
Combo: "maximize-claude"
|
||||
1. cc/claude-opus-4-6 (use subscription fully)
|
||||
2. glm/glm-4.7 (cheap backup when quota out)
|
||||
3. if/kimi-k2-thinking (free emergency fallback)
|
||||
|
||||
Monthly cost: $20 (subscription) + ~$5 (backup) = $25 total
|
||||
vs. $20 + hitting limits = frustration
|
||||
```
|
||||
|
||||
### Case 2: "I want zero cost"
|
||||
|
||||
**Problem:** Can't afford subscriptions, need reliable AI coding
|
||||
|
||||
```
|
||||
Combo: "free-forever"
|
||||
1. gc/gemini-3-flash (180K free/month)
|
||||
2. if/kimi-k2-thinking (unlimited free)
|
||||
3. qw/qwen3-coder-plus (unlimited free)
|
||||
|
||||
Monthly cost: $0
|
||||
Quality: Production-ready models
|
||||
```
|
||||
|
||||
### Case 3: "I need 24/7 coding, no interruptions"
|
||||
|
||||
**Problem:** Deadlines, can't afford downtime
|
||||
|
||||
```
|
||||
Combo: "always-on"
|
||||
1. cc/claude-opus-4-6 (best quality)
|
||||
2. cx/gpt-5.2-codex (second subscription)
|
||||
3. glm/glm-4.7 (cheap, resets daily)
|
||||
4. minimax/MiniMax-M2.1 (cheapest, 5h reset)
|
||||
5. if/kimi-k2-thinking (free unlimited)
|
||||
|
||||
Result: 5 layers of fallback = zero downtime
|
||||
Monthly cost: $20-200 (subscriptions) + $10-20 (backup)
|
||||
```
|
||||
|
||||
### Case 4: "I want FREE AI in OpenClaw"
|
||||
|
||||
**Problem:** Need AI assistant in messaging apps, completely free
|
||||
|
||||
```
|
||||
Combo: "openclaw-free"
|
||||
1. if/glm-4.7 (unlimited free)
|
||||
2. if/minimax-m2.1 (unlimited free)
|
||||
3. if/kimi-k2-thinking (unlimited free)
|
||||
|
||||
Monthly cost: $0
|
||||
Access via: WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📖 Provider Setup
|
||||
|
||||
### 🔐 Subscription Providers
|
||||
|
||||
#### Claude Code (Pro/Max)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Claude Code
|
||||
→ OAuth login → Auto token refresh
|
||||
→ 5-hour + weekly quota tracking
|
||||
|
||||
Models:
|
||||
cc/claude-opus-4-6
|
||||
cc/claude-sonnet-4-5-20250929
|
||||
cc/claude-haiku-4-5-20251001
|
||||
```
|
||||
|
||||
**Pro Tip:** Use Opus for complex tasks, Sonnet for speed. OmniRoute tracks quota per model!
|
||||
|
||||
#### OpenAI Codex (Plus/Pro)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Codex
|
||||
→ OAuth login (port 1455)
|
||||
→ 5-hour + weekly reset
|
||||
|
||||
Models:
|
||||
cx/gpt-5.2-codex
|
||||
cx/gpt-5.1-codex-max
|
||||
```
|
||||
|
||||
#### Gemini CLI (FREE 180K/month!)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Gemini CLI
|
||||
→ Google OAuth
|
||||
→ 180K completions/month + 1K/day
|
||||
|
||||
Models:
|
||||
gc/gemini-3-flash-preview
|
||||
gc/gemini-2.5-pro
|
||||
```
|
||||
|
||||
**Best Value:** Huge free tier! Use this before paid tiers.
|
||||
|
||||
#### GitHub Copilot
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect GitHub
|
||||
→ OAuth via GitHub
|
||||
→ Monthly reset (1st of month)
|
||||
|
||||
Models:
|
||||
gh/gpt-5
|
||||
gh/claude-4.5-sonnet
|
||||
gh/gemini-3-pro
|
||||
```
|
||||
|
||||
### 💰 Cheap Providers
|
||||
|
||||
#### GLM-4.7 (Daily reset, $0.6/1M)
|
||||
|
||||
1. Sign up: [Zhipu AI](https://open.bigmodel.cn/)
|
||||
2. Get API key from Coding Plan
|
||||
3. Dashboard → Add API Key: Provider: `glm`, API Key: `your-key`
|
||||
|
||||
**Use:** `glm/glm-4.7` — **Pro Tip:** Coding Plan offers 3× quota at 1/7 cost! Reset daily 10:00 AM.
|
||||
|
||||
#### MiniMax M2.1 (5h reset, $0.20/1M)
|
||||
|
||||
1. Sign up: [MiniMax](https://www.minimax.io/)
|
||||
2. Get API key → Dashboard → Add API Key
|
||||
|
||||
**Use:** `minimax/MiniMax-M2.1` — **Pro Tip:** Cheapest option for long context (1M tokens)!
|
||||
|
||||
#### Kimi K2 ($9/month flat)
|
||||
|
||||
1. Subscribe: [Moonshot AI](https://platform.moonshot.ai/)
|
||||
2. Get API key → Dashboard → Add API Key
|
||||
|
||||
**Use:** `kimi/kimi-latest` — **Pro Tip:** Fixed $9/month for 10M tokens = $0.90/1M effective cost!
|
||||
|
||||
### 🆓 FREE Providers
|
||||
|
||||
#### iFlow (8 FREE models)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect iFlow → OAuth login → Unlimited usage
|
||||
|
||||
Models: if/kimi-k2-thinking, if/qwen3-coder-plus, if/glm-4.7, if/minimax-m2, if/deepseek-r1
|
||||
```
|
||||
|
||||
#### Qwen (3 FREE models)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect Qwen → Device code auth → Unlimited usage
|
||||
|
||||
Models: qw/qwen3-coder-plus, qw/qwen3-coder-flash
|
||||
```
|
||||
|
||||
#### Kiro (Claude FREE)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect Kiro → AWS Builder ID or Google/GitHub → Unlimited
|
||||
|
||||
Models: kr/claude-sonnet-4.5, kr/claude-haiku-4.5
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
### Example 1: Maximize Subscription → Cheap Backup
|
||||
|
||||
```
|
||||
Dashboard → Combos → Create New
|
||||
|
||||
Name: premium-coding
|
||||
Models:
|
||||
1. cc/claude-opus-4-6 (Subscription primary)
|
||||
2. glm/glm-4.7 (Cheap backup, $0.6/1M)
|
||||
3. minimax/MiniMax-M2.1 (Cheapest fallback, $0.20/1M)
|
||||
|
||||
Use in CLI: premium-coding
|
||||
```
|
||||
|
||||
### Example 2: Free-Only (Zero Cost)
|
||||
|
||||
```
|
||||
Name: free-combo
|
||||
Models:
|
||||
1. gc/gemini-3-flash-preview (180K free/month)
|
||||
2. if/kimi-k2-thinking (unlimited)
|
||||
3. qw/qwen3-coder-plus (unlimited)
|
||||
|
||||
Cost: $0 forever!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Integration
|
||||
|
||||
### Cursor IDE
|
||||
|
||||
```
|
||||
Settings → Models → Advanced:
|
||||
OpenAI API Base URL: http://localhost:20128/v1
|
||||
OpenAI API Key: [from omniroute dashboard]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
|
||||
Edit `~/.claude/config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"anthropic_api_base": "http://localhost:20128/v1",
|
||||
"anthropic_api_key": "your-omniroute-api-key"
|
||||
}
|
||||
```
|
||||
|
||||
### Codex CLI
|
||||
|
||||
```bash
|
||||
export OPENAI_BASE_URL="http://localhost:20128"
|
||||
export OPENAI_API_KEY="your-omniroute-api-key"
|
||||
codex "your prompt"
|
||||
```
|
||||
|
||||
### OpenClaw
|
||||
|
||||
Edit `~/.openclaw/openclaw.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": { "primary": "omniroute/if/glm-4.7" }
|
||||
}
|
||||
},
|
||||
"models": {
|
||||
"providers": {
|
||||
"omniroute": {
|
||||
"baseUrl": "http://localhost:20128/v1",
|
||||
"apiKey": "your-omniroute-api-key",
|
||||
"api": "openai-completions",
|
||||
"models": [{ "id": "if/glm-4.7", "name": "glm-4.7" }]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Or use Dashboard:** CLI Tools → OpenClaw → Auto-config
|
||||
|
||||
### Cline / Continue / RooCode
|
||||
|
||||
```
|
||||
Provider: OpenAI Compatible
|
||||
Base URL: http://localhost:20128/v1
|
||||
API Key: [from dashboard]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Deployment
|
||||
|
||||
### Global npm install (Recommended)
|
||||
|
||||
```bash
|
||||
npm install -g omniroute
|
||||
|
||||
# Create config directory
|
||||
mkdir -p ~/.omniroute
|
||||
|
||||
# Create .env file (see .env.example)
|
||||
cp .env.example ~/.omniroute/.env
|
||||
|
||||
# Start server
|
||||
omniroute
|
||||
# Or with custom port:
|
||||
omniroute --port 3000
|
||||
```
|
||||
|
||||
The CLI automatically loads `.env` from `~/.omniroute/.env` or `./.env`.
|
||||
|
||||
### VPS Deployment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/diegosouzapw/OmniRoute.git
|
||||
cd OmniRoute && npm install && npm run build
|
||||
|
||||
export JWT_SECRET="your-secure-secret-change-this"
|
||||
export INITIAL_PASSWORD="your-password"
|
||||
export DATA_DIR="/var/lib/omniroute"
|
||||
export PORT="20128"
|
||||
export HOSTNAME="0.0.0.0"
|
||||
export NODE_ENV="production"
|
||||
export NEXT_PUBLIC_BASE_URL="http://localhost:20128"
|
||||
export API_KEY_SECRET="endpoint-proxy-api-key-secret"
|
||||
|
||||
npm run start
|
||||
# Or: pm2 start npm --name omniroute -- start
|
||||
```
|
||||
|
||||
### PM2 Deployment (Low Memory)
|
||||
|
||||
For servers with limited RAM, use the memory limit option:
|
||||
|
||||
```bash
|
||||
# With 512MB limit (default)
|
||||
pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or with custom memory limit
|
||||
OMNIROUTE_MEMORY_MB=512 pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or using ecosystem.config.js
|
||||
pm2 start ecosystem.config.js
|
||||
```
|
||||
|
||||
Create `ecosystem.config.js`:
|
||||
|
||||
```javascript
|
||||
module.exports = {
|
||||
apps: [
|
||||
{
|
||||
name: "omniroute",
|
||||
script: "npm",
|
||||
args: "start",
|
||||
env: {
|
||||
NODE_ENV: "production",
|
||||
OMNIROUTE_MEMORY_MB: "512",
|
||||
JWT_SECRET: "your-secret",
|
||||
INITIAL_PASSWORD: "your-password",
|
||||
},
|
||||
node_args: "--max-old-space-size=512",
|
||||
max_memory_restart: "300M",
|
||||
},
|
||||
],
|
||||
};
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
# Build image (default = runner-cli with codex/claude/droid preinstalled)
|
||||
docker build -t omniroute:cli .
|
||||
|
||||
# Portable mode (recommended)
|
||||
docker run -d --name omniroute -p 20128:20128 --env-file ./.env -v omniroute-data:/app/data omniroute:cli
|
||||
```
|
||||
|
||||
For host-integrated mode with CLI binaries, see the Docker section in the main docs.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ------------------------- | ------------------------------------ | ------------------------------------------------------- |
|
||||
| `JWT_SECRET` | `omniroute-default-secret-change-me` | JWT signing secret (**change in production**) |
|
||||
| `INITIAL_PASSWORD` | `123456` | First login password |
|
||||
| `DATA_DIR` | `~/.omniroute` | Data directory (db, usage, logs) |
|
||||
| `PORT` | framework default | Service port (`20128` in examples) |
|
||||
| `HOSTNAME` | framework default | Bind host (Docker defaults to `0.0.0.0`) |
|
||||
| `NODE_ENV` | runtime default | Set `production` for deploy |
|
||||
| `BASE_URL` | `http://localhost:20128` | Server-side internal base URL |
|
||||
| `CLOUD_URL` | `https://omniroute.dev` | Cloud sync endpoint base URL |
|
||||
| `API_KEY_SECRET` | `endpoint-proxy-api-key-secret` | HMAC secret for generated API keys |
|
||||
| `REQUIRE_API_KEY` | `false` | Enforce Bearer API key on `/v1/*` |
|
||||
| `ENABLE_REQUEST_LOGS` | `false` | Enables request/response logs |
|
||||
| `AUTH_COOKIE_SECURE` | `false` | Force `Secure` auth cookie (behind HTTPS reverse proxy) |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit in MB |
|
||||
| `PROMPT_CACHE_MAX_SIZE` | `50` | Max prompt cache entries |
|
||||
| `SEMANTIC_CACHE_MAX_SIZE` | `100` | Max semantic cache entries |
|
||||
|
||||
For the full environment variable reference, see the [README](../README.md).
|
||||
|
||||
---
|
||||
|
||||
## 📊 Available Models
|
||||
|
||||
<details>
|
||||
<summary><b>View all available models</b></summary>
|
||||
|
||||
**Claude Code (`cc/`)** — Pro/Max: `cc/claude-opus-4-6`, `cc/claude-sonnet-4-5-20250929`, `cc/claude-haiku-4-5-20251001`
|
||||
|
||||
**Codex (`cx/`)** — Plus/Pro: `cx/gpt-5.2-codex`, `cx/gpt-5.1-codex-max`
|
||||
|
||||
**Gemini CLI (`gc/`)** — FREE: `gc/gemini-3-flash-preview`, `gc/gemini-2.5-pro`
|
||||
|
||||
**GitHub Copilot (`gh/`)**: `gh/gpt-5`, `gh/claude-4.5-sonnet`
|
||||
|
||||
**GLM (`glm/`)** — $0.6/1M: `glm/glm-4.7`
|
||||
|
||||
**MiniMax (`minimax/`)** — $0.2/1M: `minimax/MiniMax-M2.1`
|
||||
|
||||
**iFlow (`if/`)** — FREE: `if/kimi-k2-thinking`, `if/qwen3-coder-plus`, `if/deepseek-r1`
|
||||
|
||||
**Qwen (`qw/`)** — FREE: `qw/qwen3-coder-plus`, `qw/qwen3-coder-flash`
|
||||
|
||||
**Kiro (`kr/`)** — FREE: `kr/claude-sonnet-4.5`, `kr/claude-haiku-4.5`
|
||||
|
||||
**DeepSeek (`ds/`)**: `ds/deepseek-chat`, `ds/deepseek-reasoner`
|
||||
|
||||
**Groq (`groq/`)**: `groq/llama-3.3-70b-versatile`, `groq/llama-4-maverick-17b-128e-instruct`
|
||||
|
||||
**xAI (`xai/`)**: `xai/grok-4`, `xai/grok-4-0709-fast-reasoning`, `xai/grok-code-mini`
|
||||
|
||||
**Mistral (`mistral/`)**: `mistral/mistral-large-2501`, `mistral/codestral-2501`
|
||||
|
||||
**Perplexity (`pplx/`)**: `pplx/sonar-pro`, `pplx/sonar`
|
||||
|
||||
**Together AI (`together/`)**: `together/meta-llama/Llama-3.3-70B-Instruct-Turbo`
|
||||
|
||||
**Fireworks AI (`fireworks/`)**: `fireworks/accounts/fireworks/models/deepseek-v3p1`
|
||||
|
||||
**Cerebras (`cerebras/`)**: `cerebras/llama-3.3-70b`
|
||||
|
||||
**Cohere (`cohere/`)**: `cohere/command-r-plus-08-2024`
|
||||
|
||||
**NVIDIA NIM (`nvidia/`)**: `nvidia/nvidia/llama-3.3-70b-instruct`
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Advanced Features
|
||||
|
||||
### Custom Models
|
||||
|
||||
Add any model ID to any provider without waiting for an app update:
|
||||
|
||||
```bash
|
||||
# Via API
|
||||
curl -X POST http://localhost:20128/api/provider-models \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"provider": "openai", "modelId": "gpt-4.5-preview", "modelName": "GPT-4.5 Preview"}'
|
||||
|
||||
# List: curl http://localhost:20128/api/provider-models?provider=openai
|
||||
# Remove: curl -X DELETE "http://localhost:20128/api/provider-models?provider=openai&model=gpt-4.5-preview"
|
||||
```
|
||||
|
||||
Or use Dashboard: **Providers → [Provider] → Custom Models**.
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
Route requests directly to a specific provider with model validation:
|
||||
|
||||
```bash
|
||||
POST http://localhost:20128/v1/providers/openai/chat/completions
|
||||
POST http://localhost:20128/v1/providers/openai/embeddings
|
||||
POST http://localhost:20128/v1/providers/fireworks/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
### Network Proxy Configuration
|
||||
|
||||
```bash
|
||||
# Set global proxy
|
||||
curl -X PUT http://localhost:20128/api/settings/proxy \
|
||||
-d '{"global": {"type":"http","host":"proxy.example.com","port":"8080"}}'
|
||||
|
||||
# Per-provider proxy
|
||||
curl -X PUT http://localhost:20128/api/settings/proxy \
|
||||
-d '{"providers": {"openai": {"type":"socks5","host":"proxy.example.com","port":"1080"}}}'
|
||||
|
||||
# Test proxy
|
||||
curl -X POST http://localhost:20128/api/settings/proxy/test \
|
||||
-d '{"proxy":{"type":"socks5","host":"proxy.example.com","port":"1080"}}'
|
||||
```
|
||||
|
||||
**Precedence:** Key-specific → Combo-specific → Provider-specific → Global → Environment.
|
||||
|
||||
### Model Catalog API
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/api/models/catalog
|
||||
```
|
||||
|
||||
Returns models grouped by provider with types (`chat`, `embedding`, `image`).
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
- Sync providers, combos, and settings across devices
|
||||
- Automatic background sync with timeout + fail-fast
|
||||
- Prefer server-side `BASE_URL`/`CLOUD_URL` in production
|
||||
|
||||
### LLM Gateway Intelligence (Phase 9)
|
||||
|
||||
- **Semantic Cache** — Auto-caches non-streaming, temperature=0 responses (bypass with `X-OmniRoute-No-Cache: true`)
|
||||
- **Request Idempotency** — Deduplicates requests within 5s via `Idempotency-Key` or `X-Request-Id` header
|
||||
- **Progress Tracking** — Opt-in SSE `event: progress` events via `X-OmniRoute-Progress: true` header
|
||||
|
||||
---
|
||||
|
||||
### Translator Playground
|
||||
|
||||
Access via **Dashboard → Translator**. Debug and visualize how OmniRoute translates API requests between providers.
|
||||
|
||||
| Mode | Purpose |
|
||||
| ---------------- | -------------------------------------------------------------------------------------- |
|
||||
| **Playground** | Select source/target formats, paste a request, and see the translated output instantly |
|
||||
| **Chat Tester** | Send live chat messages through the proxy and inspect the full request/response cycle |
|
||||
| **Test Bench** | Run batch tests across multiple format combinations to verify translation correctness |
|
||||
| **Live Monitor** | Watch real-time translations as requests flow through the proxy |
|
||||
|
||||
**Use cases:**
|
||||
|
||||
- Debug why a specific client/provider combination fails
|
||||
- Verify that thinking tags, tool calls, and system prompts translate correctly
|
||||
- Compare format differences between OpenAI, Claude, Gemini, and Responses API formats
|
||||
|
||||
---
|
||||
|
||||
### Routing Strategies
|
||||
|
||||
Configure via **Dashboard → Settings → Routing**.
|
||||
|
||||
| Strategy | Description |
|
||||
| ------------------------------ | ------------------------------------------------------------------------------------------------ |
|
||||
| **Fill First** | Uses accounts in priority order — primary account handles all requests until unavailable |
|
||||
| **Round Robin** | Cycles through all accounts with a configurable sticky limit (default: 3 calls per account) |
|
||||
| **P2C (Power of Two Choices)** | Picks 2 random accounts and routes to the healthier one — balances load with awareness of health |
|
||||
| **Random** | Randomly selects an account for each request using Fisher-Yates shuffle |
|
||||
| **Least Used** | Routes to the account with the oldest `lastUsedAt` timestamp, distributing traffic evenly |
|
||||
| **Cost Optimized** | Routes to the account with the lowest priority value, optimizing for lowest-cost providers |
|
||||
|
||||
#### Wildcard Model Aliases
|
||||
|
||||
Create wildcard patterns to remap model names:
|
||||
|
||||
```
|
||||
Pattern: claude-sonnet-* → Target: cc/claude-sonnet-4-5-20250929
|
||||
Pattern: gpt-* → Target: gh/gpt-5.1-codex
|
||||
```
|
||||
|
||||
Wildcards support `*` (any characters) and `?` (single character).
|
||||
|
||||
#### Fallback Chains
|
||||
|
||||
Define global fallback chains that apply across all requests:
|
||||
|
||||
```
|
||||
Chain: production-fallback
|
||||
1. cc/claude-opus-4-6
|
||||
2. gh/gpt-5.1-codex
|
||||
3. glm/glm-4.7
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Resilience & Circuit Breakers
|
||||
|
||||
Configure via **Dashboard → Settings → Resilience**.
|
||||
|
||||
OmniRoute implements provider-level resilience with four components:
|
||||
|
||||
1. **Provider Profiles** — Per-provider configuration for:
|
||||
- Failure threshold (how many failures before opening)
|
||||
- Cooldown duration
|
||||
- Rate limit detection sensitivity
|
||||
- Exponential backoff parameters
|
||||
|
||||
2. **Editable Rate Limits** — System-level defaults configurable in the dashboard:
|
||||
- **Requests Per Minute (RPM)** — Maximum requests per minute per account
|
||||
- **Min Time Between Requests** — Minimum gap in milliseconds between requests
|
||||
- **Max Concurrent Requests** — Maximum simultaneous requests per account
|
||||
- Click **Edit** to modify, then **Save** or **Cancel**. Values persist via the resilience API.
|
||||
|
||||
3. **Circuit Breaker** — Tracks failures per provider and automatically opens the circuit when a threshold is reached:
|
||||
- **CLOSED** (Healthy) — Requests flow normally
|
||||
- **OPEN** — Provider is temporarily blocked after repeated failures
|
||||
- **HALF_OPEN** — Testing if provider has recovered
|
||||
|
||||
4. **Policies & Locked Identifiers** — Shows circuit breaker status and locked identifiers with force-unlock capability.
|
||||
|
||||
5. **Rate Limit Auto-Detection** — Monitors `429` and `Retry-After` headers to proactively avoid hitting provider rate limits.
|
||||
|
||||
**Pro Tip:** Use **Reset All** button to clear all circuit breakers and cooldowns when a provider recovers from an outage.
|
||||
|
||||
---
|
||||
|
||||
### Database Export / Import
|
||||
|
||||
Manage database backups in **Dashboard → Settings → System & Storage**.
|
||||
|
||||
| Action | Description |
|
||||
| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| **Export Database** | Downloads the current SQLite database as a `.sqlite` file |
|
||||
| **Export All (.tar.gz)** | Downloads a full backup archive including: database, settings, combos, provider connections (no credentials), API key metadata |
|
||||
| **Import Database** | Upload a `.sqlite` file to replace the current database. A pre-import backup is automatically created |
|
||||
|
||||
```bash
|
||||
# API: Export database
|
||||
curl -o backup.sqlite http://localhost:20128/api/db-backups/export
|
||||
|
||||
# API: Export all (full archive)
|
||||
curl -o backup.tar.gz http://localhost:20128/api/db-backups/exportAll
|
||||
|
||||
# API: Import database
|
||||
curl -X POST http://localhost:20128/api/db-backups/import \
|
||||
-F "file=@backup.sqlite"
|
||||
```
|
||||
|
||||
**Import Validation:** The imported file is validated for integrity (SQLite pragma check), required tables (`provider_connections`, `provider_nodes`, `combos`, `api_keys`), and size (max 100MB).
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Migrate OmniRoute between machines
|
||||
- Create external backups for disaster recovery
|
||||
- Share configurations between team members (export all → share archive)
|
||||
|
||||
---
|
||||
|
||||
### Settings Dashboard
|
||||
|
||||
The settings page is organized into 5 tabs for easy navigation:
|
||||
|
||||
| Tab | Contents |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------- |
|
||||
| **Security** | Login/Password settings, IP Access Control, API auth for `/models`, and Provider Blocking |
|
||||
| **Routing** | Global routing strategy (6 options), wildcard model aliases, fallback chains, combo defaults |
|
||||
| **Resilience** | Provider profiles, editable rate limits, circuit breaker status, policies & locked identifiers |
|
||||
| **AI** | Thinking budget configuration, global system prompt injection, prompt cache stats |
|
||||
| **Advanced** | Global proxy configuration (HTTP/SOCKS5) |
|
||||
|
||||
---
|
||||
|
||||
### Costs & Budget Management
|
||||
|
||||
Access via **Dashboard → Costs**.
|
||||
|
||||
| Tab | Purpose |
|
||||
| ----------- | ---------------------------------------------------------------------------------------- |
|
||||
| **Budget** | Set spending limits per API key with daily/weekly/monthly budgets and real-time tracking |
|
||||
| **Pricing** | View and edit model pricing entries — cost per 1K input/output tokens per provider |
|
||||
|
||||
```bash
|
||||
# API: Set a budget
|
||||
curl -X POST http://localhost:20128/api/usage/budget \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"keyId": "key-123", "limit": 50.00, "period": "monthly"}'
|
||||
|
||||
# API: Get current budget status
|
||||
curl http://localhost:20128/api/usage/budget
|
||||
```
|
||||
|
||||
**Cost Tracking:** Every request logs token usage and calculates cost using the pricing table. View breakdowns in **Dashboard → Usage** by provider, model, and API key.
|
||||
|
||||
---
|
||||
|
||||
### Audio Transcription
|
||||
|
||||
OmniRoute supports audio transcription via the OpenAI-compatible endpoint:
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
|
||||
# Example with curl
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@audio.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
Available providers: **Deepgram** (`deepgram/`), **AssemblyAI** (`assemblyai/`).
|
||||
|
||||
Supported audio formats: `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
### Combo Balancing Strategies
|
||||
|
||||
Configure per-combo balancing in **Dashboard → Combos → Create/Edit → Strategy**.
|
||||
|
||||
| Strategy | Description |
|
||||
| ------------------ | ------------------------------------------------------------------------ |
|
||||
| **Round-Robin** | Rotates through models sequentially |
|
||||
| **Priority** | Always tries the first model; falls back only on error |
|
||||
| **Random** | Picks a random model from the combo for each request |
|
||||
| **Weighted** | Routes proportionally based on assigned weights per model |
|
||||
| **Least-Used** | Routes to the model with the fewest recent requests (uses combo metrics) |
|
||||
| **Cost-Optimized** | Routes to the cheapest available model (uses pricing table) |
|
||||
|
||||
Global combo defaults can be set in **Dashboard → Settings → Routing → Combo Defaults**.
|
||||
|
||||
---
|
||||
|
||||
### Health Dashboard
|
||||
|
||||
Access via **Dashboard → Health**. Real-time system health overview with 6 cards:
|
||||
|
||||
| Card | What It Shows |
|
||||
| --------------------- | ----------------------------------------------------------- |
|
||||
| **System Status** | Uptime, version, memory usage, data directory |
|
||||
| **Provider Health** | Per-provider circuit breaker state (Closed/Open/Half-Open) |
|
||||
| **Rate Limits** | Active rate limit cooldowns per account with remaining time |
|
||||
| **Active Lockouts** | Providers temporarily blocked by the lockout policy |
|
||||
| **Signature Cache** | Deduplication cache stats (active keys, hit rate) |
|
||||
| **Latency Telemetry** | p50/p95/p99 latency aggregation per provider |
|
||||
|
||||
**Pro Tip:** The Health page auto-refreshes every 10 seconds. Use the circuit breaker card to identify which providers are experiencing issues.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application (Electron)
|
||||
|
||||
OmniRoute is available as a native desktop application for Windows, macOS, and Linux.
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# From the electron directory:
|
||||
cd electron
|
||||
npm install
|
||||
|
||||
# Development mode (connect to running Next.js dev server):
|
||||
npm run dev
|
||||
|
||||
# Production mode (uses standalone build):
|
||||
npm start
|
||||
```
|
||||
|
||||
### Building Installers
|
||||
|
||||
```bash
|
||||
cd electron
|
||||
npm run build # Current platform
|
||||
npm run build:win # Windows (.exe NSIS)
|
||||
npm run build:mac # macOS (.dmg universal)
|
||||
npm run build:linux # Linux (.AppImage)
|
||||
```
|
||||
|
||||
Output → `electron/dist-electron/`
|
||||
|
||||
### Key Features
|
||||
|
||||
| Feature | Description |
|
||||
| --------------------------- | ---------------------------------------------------- |
|
||||
| **Server Readiness** | Polls server before showing window (no blank screen) |
|
||||
| **System Tray** | Minimize to tray, change port, quit from tray menu |
|
||||
| **Port Management** | Change server port from tray (auto-restarts server) |
|
||||
| **Content Security Policy** | Restrictive CSP via session headers |
|
||||
| **Single Instance** | Only one app instance can run at a time |
|
||||
| **Offline Mode** | Bundled Next.js server works without internet |
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| --------------------- | ------- | -------------------------------- |
|
||||
| `OMNIROUTE_PORT` | `20128` | Server port |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit (64–16384 MB) |
|
||||
|
||||
📖 Full documentation: [`electron/README.md`](../electron/README.md)
|
||||
@@ -0,0 +1,403 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/VM_DEPLOYMENT_GUIDE.md) · 🇪🇸 [es](../es/VM_DEPLOYMENT_GUIDE.md) · 🇫🇷 [fr](../fr/VM_DEPLOYMENT_GUIDE.md) · 🇩🇪 [de](../de/VM_DEPLOYMENT_GUIDE.md) · 🇮🇹 [it](../it/VM_DEPLOYMENT_GUIDE.md) · 🇷🇺 [ru](../ru/VM_DEPLOYMENT_GUIDE.md) · 🇨🇳 [zh-CN](../zh-CN/VM_DEPLOYMENT_GUIDE.md) · 🇯🇵 [ja](../ja/VM_DEPLOYMENT_GUIDE.md) · 🇰🇷 [ko](../ko/VM_DEPLOYMENT_GUIDE.md) · 🇸🇦 [ar](../ar/VM_DEPLOYMENT_GUIDE.md) · 🇮🇳 [in](../in/VM_DEPLOYMENT_GUIDE.md) · 🇹🇭 [th](../th/VM_DEPLOYMENT_GUIDE.md) · 🇻🇳 [vi](../vi/VM_DEPLOYMENT_GUIDE.md) · 🇮🇩 [id](../id/VM_DEPLOYMENT_GUIDE.md) · 🇲🇾 [ms](../ms/VM_DEPLOYMENT_GUIDE.md) · 🇳🇱 [nl](../nl/VM_DEPLOYMENT_GUIDE.md) · 🇵🇱 [pl](../pl/VM_DEPLOYMENT_GUIDE.md) · 🇸🇪 [sv](../sv/VM_DEPLOYMENT_GUIDE.md) · 🇳🇴 [no](../no/VM_DEPLOYMENT_GUIDE.md) · 🇩🇰 [da](../da/VM_DEPLOYMENT_GUIDE.md) · 🇫🇮 [fi](../fi/VM_DEPLOYMENT_GUIDE.md) · 🇵🇹 [pt](../pt/VM_DEPLOYMENT_GUIDE.md) · 🇷🇴 [ro](../ro/VM_DEPLOYMENT_GUIDE.md) · 🇭🇺 [hu](../hu/VM_DEPLOYMENT_GUIDE.md) · 🇧🇬 [bg](../bg/VM_DEPLOYMENT_GUIDE.md) · 🇸🇰 [sk](../sk/VM_DEPLOYMENT_GUIDE.md) · 🇺🇦 [uk-UA](../uk-UA/VM_DEPLOYMENT_GUIDE.md) · 🇮🇱 [he](../he/VM_DEPLOYMENT_GUIDE.md) · 🇵🇭 [phi](../phi/VM_DEPLOYMENT_GUIDE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Guia de Deploy em VM com Cloudflare
|
||||
|
||||
Guia completo para instalar e configurar o OmniRoute em uma VM (VPS) com domínio gerenciado via Cloudflare.
|
||||
|
||||
---
|
||||
|
||||
## Pré-Requisitos
|
||||
|
||||
| Item | Mínimo | Recomendado |
|
||||
| ----------- | ------------------------ | ---------------- |
|
||||
| **CPU** | 1 vCPU | 2 vCPU |
|
||||
| **RAM** | 1 GB | 2 GB |
|
||||
| **Disco** | 10 GB SSD | 25 GB SSD |
|
||||
| **SO** | Ubuntu 22.04 LTS | Ubuntu 24.04 LTS |
|
||||
| **Domínio** | Registrado no Cloudflare | — |
|
||||
| **Docker** | Docker Engine 24+ | Docker 27+ |
|
||||
|
||||
**Providers testados**: Akamai (Linode), DigitalOcean, Vultr, Hetzner, AWS Lightsail.
|
||||
|
||||
---
|
||||
|
||||
## 1. Configurar a VM
|
||||
|
||||
### 1.1 Criar a instância
|
||||
|
||||
No seu provider de VPS preferido:
|
||||
|
||||
- Escolha Ubuntu 24.04 LTS
|
||||
- Selecione o plano mínimo (1 vCPU / 1 GB RAM)
|
||||
- Defina uma senha forte para root ou configure SSH key
|
||||
- Anote o **IP público** (ex: `203.0.113.10`)
|
||||
|
||||
### 1.2 Conectar via SSH
|
||||
|
||||
```bash
|
||||
ssh root@203.0.113.10
|
||||
```
|
||||
|
||||
### 1.3 Atualizar o sistema
|
||||
|
||||
```bash
|
||||
apt update && apt upgrade -y
|
||||
```
|
||||
|
||||
### 1.4 Instalar Docker
|
||||
|
||||
```bash
|
||||
# Instalar dependências
|
||||
apt install -y ca-certificates curl gnupg
|
||||
|
||||
# Adicionar repositório oficial do Docker
|
||||
install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
apt update
|
||||
apt install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
|
||||
```
|
||||
|
||||
### 1.5 Instalar nginx
|
||||
|
||||
```bash
|
||||
apt install -y nginx
|
||||
```
|
||||
|
||||
### 1.6 Configurar Firewall (UFW)
|
||||
|
||||
```bash
|
||||
ufw default deny incoming
|
||||
ufw default allow outgoing
|
||||
ufw allow 22/tcp # SSH
|
||||
ufw allow 80/tcp # HTTP (redirect)
|
||||
ufw allow 443/tcp # HTTPS
|
||||
ufw enable
|
||||
```
|
||||
|
||||
> **Dica**: Para segurança máxima, restrinja as portas 80 e 443 apenas para IPs da Cloudflare. Veja a seção [Segurança Avançada](#segurança-avançada).
|
||||
|
||||
---
|
||||
|
||||
## 2. Instalar o OmniRoute
|
||||
|
||||
### 2.1 Criar diretório de configuração
|
||||
|
||||
```bash
|
||||
mkdir -p /opt/omniroute
|
||||
```
|
||||
|
||||
### 2.2 Criar arquivo de variáveis de ambiente
|
||||
|
||||
```bash
|
||||
cat > /opt/omniroute/.env << 'EOF'
|
||||
# === Segurança ===
|
||||
JWT_SECRET=ALTERE-PARA-CHAVE-SECRETA-UNICA-64-CHARS
|
||||
INITIAL_PASSWORD=SuaSenhaSegura123!
|
||||
API_KEY_SECRET=ALTERE-PARA-OUTRA-CHAVE-SECRETA
|
||||
STORAGE_ENCRYPTION_KEY=ALTERE-PARA-TERCEIRA-CHAVE-SECRETA
|
||||
STORAGE_ENCRYPTION_KEY_VERSION=v1
|
||||
MACHINE_ID_SALT=ALTERE-PARA-SALT-UNICO
|
||||
|
||||
# === App ===
|
||||
PORT=20128
|
||||
NODE_ENV=production
|
||||
HOSTNAME=0.0.0.0
|
||||
DATA_DIR=/app/data
|
||||
STORAGE_DRIVER=sqlite
|
||||
ENABLE_REQUEST_LOGS=true
|
||||
AUTH_COOKIE_SECURE=false
|
||||
REQUIRE_API_KEY=false
|
||||
|
||||
# === Domain (altere para seu domínio) ===
|
||||
BASE_URL=https://llms.seudominio.com
|
||||
NEXT_PUBLIC_BASE_URL=https://llms.seudominio.com
|
||||
|
||||
# === Cloud Sync (opcional) ===
|
||||
# CLOUD_URL=https://cloud.omniroute.online
|
||||
# NEXT_PUBLIC_CLOUD_URL=https://cloud.omniroute.online
|
||||
EOF
|
||||
```
|
||||
|
||||
> ⚠️ **IMPORTANTE**: Gere chaves secretas únicas! Use `openssl rand -hex 32` para cada chave.
|
||||
|
||||
### 2.3 Iniciar o container
|
||||
|
||||
```bash
|
||||
docker pull diegosouzapw/omniroute:latest
|
||||
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
--env-file /opt/omniroute/.env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
### 2.4 Verificar se está rodando
|
||||
|
||||
```bash
|
||||
docker ps | grep omniroute
|
||||
docker logs omniroute --tail 20
|
||||
```
|
||||
|
||||
Deve exibir: `[DB] SQLite database ready` e `listening on port 20128`.
|
||||
|
||||
---
|
||||
|
||||
## 3. Configurar nginx (Reverse Proxy)
|
||||
|
||||
### 3.1 Gerar certificado SSL (Cloudflare Origin)
|
||||
|
||||
No painel da Cloudflare:
|
||||
|
||||
1. Vá em **SSL/TLS → Origin Server**
|
||||
2. Clique **Create Certificate**
|
||||
3. Deixe os padrões (15 anos, \*.seudominio.com)
|
||||
4. Copie o **Origin Certificate** e a **Private Key**
|
||||
|
||||
```bash
|
||||
mkdir -p /etc/nginx/ssl
|
||||
|
||||
# Colar o certificado
|
||||
nano /etc/nginx/ssl/origin.crt
|
||||
|
||||
# Colar a chave privada
|
||||
nano /etc/nginx/ssl/origin.key
|
||||
|
||||
chmod 600 /etc/nginx/ssl/origin.key
|
||||
```
|
||||
|
||||
### 3.2 Configuração do nginx
|
||||
|
||||
```bash
|
||||
cat > /etc/nginx/sites-available/omniroute << 'NGINX'
|
||||
# Default server — bloqueia acesso direto por IP
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen [::]:80 default_server;
|
||||
listen 443 ssl default_server;
|
||||
listen [::]:443 ssl default_server;
|
||||
ssl_certificate /etc/nginx/ssl/origin.crt;
|
||||
ssl_certificate_key /etc/nginx/ssl/origin.key;
|
||||
server_name _;
|
||||
return 444;
|
||||
}
|
||||
|
||||
# OmniRoute — HTTPS
|
||||
server {
|
||||
listen 443 ssl;
|
||||
listen [::]:443 ssl;
|
||||
server_name llms.seudominio.com; # Altere para seu domínio
|
||||
|
||||
ssl_certificate /etc/nginx/ssl/origin.crt;
|
||||
ssl_certificate_key /etc/nginx/ssl/origin.key;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
|
||||
client_max_body_size 100M;
|
||||
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:20128;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
# SSE (Server-Sent Events) — streaming AI responses
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
}
|
||||
}
|
||||
|
||||
# HTTP → HTTPS redirect
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
server_name llms.seudominio.com;
|
||||
return 301 https://$server_name$request_uri;
|
||||
}
|
||||
NGINX
|
||||
```
|
||||
|
||||
### 3.3 Ativar e testar
|
||||
|
||||
```bash
|
||||
# Remover config padrão
|
||||
rm -f /etc/nginx/sites-enabled/default
|
||||
|
||||
# Ativar OmniRoute
|
||||
ln -sf /etc/nginx/sites-available/omniroute /etc/nginx/sites-enabled/omniroute
|
||||
|
||||
# Testar e recarregar
|
||||
nginx -t && systemctl reload nginx
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Configurar Cloudflare DNS
|
||||
|
||||
### 4.1 Adicionar registro DNS
|
||||
|
||||
No painel da Cloudflare → DNS:
|
||||
|
||||
| Type | Name | Content | Proxy |
|
||||
| ---- | ------ | ------------------------- | ---------- |
|
||||
| A | `llms` | `203.0.113.10` (IP da VM) | ✅ Proxied |
|
||||
|
||||
### 4.2 Configurar SSL
|
||||
|
||||
Em **SSL/TLS → Overview**:
|
||||
|
||||
- Modo: **Full (Strict)**
|
||||
|
||||
Em **SSL/TLS → Edge Certificates**:
|
||||
|
||||
- Always Use HTTPS: ✅ On
|
||||
- Minimum TLS Version: TLS 1.2
|
||||
- Automatic HTTPS Rewrites: ✅ On
|
||||
|
||||
### 4.3 Testar
|
||||
|
||||
```bash
|
||||
curl -sI https://llms.seudominio.com/health
|
||||
# Deve retornar HTTP/2 200
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Operações e Manutenção
|
||||
|
||||
### Atualizar para nova versão
|
||||
|
||||
```bash
|
||||
docker pull diegosouzapw/omniroute:latest
|
||||
docker stop omniroute && docker rm omniroute
|
||||
docker run -d --name omniroute --restart unless-stopped \
|
||||
--env-file /opt/omniroute/.env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
### Ver logs
|
||||
|
||||
```bash
|
||||
docker logs -f omniroute # Stream em tempo real
|
||||
docker logs omniroute --tail 50 # Últimas 50 linhas
|
||||
```
|
||||
|
||||
### Backup manual do banco
|
||||
|
||||
```bash
|
||||
# Copiar dados do volume para o host
|
||||
docker cp omniroute:/app/data ./backup-$(date +%F)
|
||||
|
||||
# Ou comprimir todo o volume
|
||||
docker run --rm -v omniroute-data:/data -v $(pwd):/backup \
|
||||
alpine tar czf /backup/omniroute-data-$(date +%F).tar.gz /data
|
||||
```
|
||||
|
||||
### Restaurar de backup
|
||||
|
||||
```bash
|
||||
docker stop omniroute
|
||||
docker run --rm -v omniroute-data:/data -v $(pwd):/backup \
|
||||
alpine sh -c "rm -rf /data/* && tar xzf /backup/omniroute-data-YYYY-MM-DD.tar.gz -C /"
|
||||
docker start omniroute
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Segurança Avançada
|
||||
|
||||
### Restringir nginx para Cloudflare IPs
|
||||
|
||||
```bash
|
||||
cat > /etc/nginx/cloudflare-ips.conf << 'CF'
|
||||
# Cloudflare IPv4 ranges — atualizar periodicamente
|
||||
# https://www.cloudflare.com/ips-v4/
|
||||
set_real_ip_from 173.245.48.0/20;
|
||||
set_real_ip_from 103.21.244.0/22;
|
||||
set_real_ip_from 103.22.200.0/22;
|
||||
set_real_ip_from 103.31.4.0/22;
|
||||
set_real_ip_from 141.101.64.0/18;
|
||||
set_real_ip_from 108.162.192.0/18;
|
||||
set_real_ip_from 190.93.240.0/20;
|
||||
set_real_ip_from 188.114.96.0/20;
|
||||
set_real_ip_from 197.234.240.0/22;
|
||||
set_real_ip_from 198.41.128.0/17;
|
||||
set_real_ip_from 162.158.0.0/15;
|
||||
set_real_ip_from 104.16.0.0/13;
|
||||
set_real_ip_from 104.24.0.0/14;
|
||||
set_real_ip_from 172.64.0.0/13;
|
||||
set_real_ip_from 131.0.72.0/22;
|
||||
real_ip_header CF-Connecting-IP;
|
||||
CF
|
||||
```
|
||||
|
||||
Adicionar no `nginx.conf` dentro do bloco `http {}`:
|
||||
|
||||
```nginx
|
||||
include /etc/nginx/cloudflare-ips.conf;
|
||||
```
|
||||
|
||||
### Install fail2ban
|
||||
|
||||
```bash
|
||||
apt install -y fail2ban
|
||||
systemctl enable fail2ban
|
||||
systemctl start fail2ban
|
||||
|
||||
# Verificar status
|
||||
fail2ban-client status sshd
|
||||
```
|
||||
|
||||
### Bloquear acesso direto na porta do Docker
|
||||
|
||||
```bash
|
||||
# Impedir acesso externo direto à porta 20128
|
||||
iptables -I DOCKER-USER -p tcp --dport 20128 -j DROP
|
||||
iptables -I DOCKER-USER -i lo -p tcp --dport 20128 -j ACCEPT
|
||||
|
||||
# Persistir as regras
|
||||
apt install -y iptables-persistent
|
||||
netfilter-persistent save
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Deploy do Cloud Worker (Opcional)
|
||||
|
||||
Para acesso remoto via Cloudflare Workers (sem expor a VM diretamente):
|
||||
|
||||
```bash
|
||||
# No repositório local
|
||||
cd omnirouteCloud
|
||||
npm install
|
||||
npx wrangler login
|
||||
npx wrangler deploy
|
||||
```
|
||||
|
||||
Ver documentação completa em [omnirouteCloud/README.md](../omnirouteCloud/README.md).
|
||||
|
||||
---
|
||||
|
||||
## Resumo de Portas
|
||||
|
||||
| Porta | Serviço | Acesso |
|
||||
| ----- | ----------- | ----------------------------- |
|
||||
| 22 | SSH | Público (com fail2ban) |
|
||||
| 80 | nginx HTTP | Redirect → HTTPS |
|
||||
| 443 | nginx HTTPS | Via Cloudflare Proxy |
|
||||
| 20128 | OmniRoute | Somente localhost (via nginx) |
|
||||
@@ -0,0 +1,200 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/A2A-SERVER.md) · 🇪🇸 [es](../es/A2A-SERVER.md) · 🇫🇷 [fr](../fr/A2A-SERVER.md) · 🇩🇪 [de](../de/A2A-SERVER.md) · 🇮🇹 [it](../it/A2A-SERVER.md) · 🇷🇺 [ru](../ru/A2A-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/A2A-SERVER.md) · 🇯🇵 [ja](../ja/A2A-SERVER.md) · 🇰🇷 [ko](../ko/A2A-SERVER.md) · 🇸🇦 [ar](../ar/A2A-SERVER.md) · 🇮🇳 [in](../in/A2A-SERVER.md) · 🇹🇭 [th](../th/A2A-SERVER.md) · 🇻🇳 [vi](../vi/A2A-SERVER.md) · 🇮🇩 [id](../id/A2A-SERVER.md) · 🇲🇾 [ms](../ms/A2A-SERVER.md) · 🇳🇱 [nl](../nl/A2A-SERVER.md) · 🇵🇱 [pl](../pl/A2A-SERVER.md) · 🇸🇪 [sv](../sv/A2A-SERVER.md) · 🇳🇴 [no](../no/A2A-SERVER.md) · 🇩🇰 [da](../da/A2A-SERVER.md) · 🇫🇮 [fi](../fi/A2A-SERVER.md) · 🇵🇹 [pt](../pt/A2A-SERVER.md) · 🇷🇴 [ro](../ro/A2A-SERVER.md) · 🇭🇺 [hu](../hu/A2A-SERVER.md) · 🇧🇬 [bg](../bg/A2A-SERVER.md) · 🇸🇰 [sk](../sk/A2A-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/A2A-SERVER.md) · 🇮🇱 [he](../he/A2A-SERVER.md) · 🇵🇭 [phi](../phi/A2A-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute A2A Server Documentation
|
||||
|
||||
> Agent-to-Agent Protocol v0.3 — OmniRoute as an intelligent routing agent
|
||||
|
||||
## Agent Discovery
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/.well-known/agent.json
|
||||
```
|
||||
|
||||
Returns the Agent Card describing OmniRoute's capabilities, skills, and authentication requirements.
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
All `/a2a` requests require an API key via the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer YOUR_OMNIROUTE_API_KEY
|
||||
```
|
||||
|
||||
If no API key is configured on the server, authentication is bypassed.
|
||||
|
||||
---
|
||||
|
||||
## JSON-RPC 2.0 Methods
|
||||
|
||||
### `message/send` — Synchronous Execution
|
||||
|
||||
Sends a message to a skill and waits for the complete response.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Write a hello world in Python"}],
|
||||
"metadata": {"model": "auto", "combo": "fast-coding"}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"result": {
|
||||
"task": { "id": "uuid", "state": "completed" },
|
||||
"artifacts": [{ "type": "text", "content": "..." }],
|
||||
"metadata": {
|
||||
"routing_explanation": "Selected claude-sonnet via provider \"anthropic\" (latency: 1200ms, cost: $0.003)",
|
||||
"cost_envelope": { "estimated": 0.005, "actual": 0.003, "currency": "USD" },
|
||||
"resilience_trace": [
|
||||
{ "event": "primary_selected", "provider": "anthropic", "timestamp": "..." }
|
||||
],
|
||||
"policy_verdict": { "allowed": true, "reason": "within budget and quota limits" }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `message/stream` — SSE Streaming
|
||||
|
||||
Same as `message/send` but returns Server-Sent Events for real-time streaming.
|
||||
|
||||
```bash
|
||||
curl -N -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/stream",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Explain quantum computing"}]
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**SSE Events:**
|
||||
|
||||
```
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"working"},"chunk":{"type":"text","content":"..."}}}
|
||||
|
||||
: heartbeat 2026-03-03T17:00:00Z
|
||||
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"completed"},"metadata":{...}}}
|
||||
```
|
||||
|
||||
### `tasks/get` — Query Task Status
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"2","method":"tasks/get","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
### `tasks/cancel` — Cancel a Task
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"3","method":"tasks/cancel","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Skills
|
||||
|
||||
| Skill | Description |
|
||||
| :----------------- | :------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `smart-routing` | Routes prompts through OmniRoute's intelligent pipeline. Returns response with routing explanation, cost, and resilience trace. |
|
||||
| `quota-management` | Answers natural-language queries about provider quotas, suggests free combos, and provides quota rankings. |
|
||||
|
||||
---
|
||||
|
||||
## Task Lifecycle
|
||||
|
||||
```
|
||||
submitted → working → completed
|
||||
→ failed
|
||||
→ cancelled
|
||||
```
|
||||
|
||||
- Tasks expire after 5 minutes (configurable)
|
||||
- Terminal states: `completed`, `failed`, `cancelled`
|
||||
- Event log tracks every state transition
|
||||
|
||||
---
|
||||
|
||||
## Error Codes
|
||||
|
||||
| Code | Meaning |
|
||||
| :----- | :----------------------------- |
|
||||
| -32700 | Parse error (invalid JSON) |
|
||||
| -32600 | Invalid request / Unauthorized |
|
||||
| -32601 | Method or skill not found |
|
||||
| -32602 | Invalid params |
|
||||
| -32603 | Internal error |
|
||||
|
||||
---
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### Python (requests)
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
resp = requests.post("http://localhost:20128/a2a", json={
|
||||
"jsonrpc": "2.0", "id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}
|
||||
}, headers={"Authorization": "Bearer YOUR_KEY"})
|
||||
|
||||
result = resp.json()["result"]
|
||||
print(result["artifacts"][0]["content"])
|
||||
print(result["metadata"]["routing_explanation"])
|
||||
```
|
||||
|
||||
### TypeScript (fetch)
|
||||
|
||||
```typescript
|
||||
const resp = await fetch("http://localhost:20128/a2a", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: "Bearer YOUR_KEY",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: "1",
|
||||
method: "message/send",
|
||||
params: {
|
||||
skill: "smart-routing",
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
const { result } = await resp.json();
|
||||
console.log(result.metadata.routing_explanation);
|
||||
```
|
||||
@@ -0,0 +1,455 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/API_REFERENCE.md) · 🇪🇸 [es](../es/API_REFERENCE.md) · 🇫🇷 [fr](../fr/API_REFERENCE.md) · 🇩🇪 [de](../de/API_REFERENCE.md) · 🇮🇹 [it](../it/API_REFERENCE.md) · 🇷🇺 [ru](../ru/API_REFERENCE.md) · 🇨🇳 [zh-CN](../zh-CN/API_REFERENCE.md) · 🇯🇵 [ja](../ja/API_REFERENCE.md) · 🇰🇷 [ko](../ko/API_REFERENCE.md) · 🇸🇦 [ar](../ar/API_REFERENCE.md) · 🇮🇳 [in](../in/API_REFERENCE.md) · 🇹🇭 [th](../th/API_REFERENCE.md) · 🇻🇳 [vi](../vi/API_REFERENCE.md) · 🇮🇩 [id](../id/API_REFERENCE.md) · 🇲🇾 [ms](../ms/API_REFERENCE.md) · 🇳🇱 [nl](../nl/API_REFERENCE.md) · 🇵🇱 [pl](../pl/API_REFERENCE.md) · 🇸🇪 [sv](../sv/API_REFERENCE.md) · 🇳🇴 [no](../no/API_REFERENCE.md) · 🇩🇰 [da](../da/API_REFERENCE.md) · 🇫🇮 [fi](../fi/API_REFERENCE.md) · 🇵🇹 [pt](../pt/API_REFERENCE.md) · 🇷🇴 [ro](../ro/API_REFERENCE.md) · 🇭🇺 [hu](../hu/API_REFERENCE.md) · 🇧🇬 [bg](../bg/API_REFERENCE.md) · 🇸🇰 [sk](../sk/API_REFERENCE.md) · 🇺🇦 [uk-UA](../uk-UA/API_REFERENCE.md) · 🇮🇱 [he](../he/API_REFERENCE.md) · 🇵🇭 [phi](../phi/API_REFERENCE.md)
|
||||
|
||||
---
|
||||
|
||||
# API Reference
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](API_REFERENCE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/API_REFERENCE.md) | 🇪🇸 [Español](i18n/es/API_REFERENCE.md) | 🇫🇷 [Français](i18n/fr/API_REFERENCE.md) | 🇮🇹 [Italiano](i18n/it/API_REFERENCE.md) | 🇷🇺 [Русский](i18n/ru/API_REFERENCE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/API_REFERENCE.md) | 🇩🇪 [Deutsch](i18n/de/API_REFERENCE.md) | 🇮🇳 [हिन्दी](i18n/in/API_REFERENCE.md) | 🇹🇭 [ไทย](i18n/th/API_REFERENCE.md) | 🇺🇦 [Українська](i18n/uk-UA/API_REFERENCE.md) | 🇸🇦 [العربية](i18n/ar/API_REFERENCE.md) | 🇯🇵 [日本語](i18n/ja/API_REFERENCE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/API_REFERENCE.md) | 🇧🇬 [Български](i18n/bg/API_REFERENCE.md) | 🇩🇰 [Dansk](i18n/da/API_REFERENCE.md) | 🇫🇮 [Suomi](i18n/fi/API_REFERENCE.md) | 🇮🇱 [עברית](i18n/he/API_REFERENCE.md) | 🇭🇺 [Magyar](i18n/hu/API_REFERENCE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/API_REFERENCE.md) | 🇰🇷 [한국어](i18n/ko/API_REFERENCE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/API_REFERENCE.md) | 🇳🇱 [Nederlands](i18n/nl/API_REFERENCE.md) | 🇳🇴 [Norsk](i18n/no/API_REFERENCE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/API_REFERENCE.md) | 🇷🇴 [Română](i18n/ro/API_REFERENCE.md) | 🇵🇱 [Polski](i18n/pl/API_REFERENCE.md) | 🇸🇰 [Slovenčina](i18n/sk/API_REFERENCE.md) | 🇸🇪 [Svenska](i18n/sv/API_REFERENCE.md) | 🇵🇭 [Filipino](i18n/phi/API_REFERENCE.md)
|
||||
|
||||
Complete reference for all OmniRoute API endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Chat Completions](#chat-completions)
|
||||
- [Embeddings](#embeddings)
|
||||
- [Image Generation](#image-generation)
|
||||
- [List Models](#list-models)
|
||||
- [Compatibility Endpoints](#compatibility-endpoints)
|
||||
- [Semantic Cache](#semantic-cache)
|
||||
- [Dashboard & Management](#dashboard--management)
|
||||
- [Request Processing](#request-processing)
|
||||
- [Authentication](#authentication)
|
||||
|
||||
---
|
||||
|
||||
## Chat Completions
|
||||
|
||||
```bash
|
||||
POST /v1/chat/completions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "cc/claude-opus-4-6",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Write a function to..."}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
```
|
||||
|
||||
### Custom Headers
|
||||
|
||||
| Header | Direction | Description |
|
||||
| ------------------------ | --------- | --------------------------------- |
|
||||
| `X-OmniRoute-No-Cache` | Request | Set to `true` to bypass cache |
|
||||
| `X-OmniRoute-Progress` | Request | Set to `true` for progress events |
|
||||
| `Idempotency-Key` | Request | Dedup key (5s window) |
|
||||
| `X-Request-Id` | Request | Alternative dedup key |
|
||||
| `X-OmniRoute-Cache` | Response | `HIT` or `MISS` (non-streaming) |
|
||||
| `X-OmniRoute-Idempotent` | Response | `true` if deduplicated |
|
||||
| `X-OmniRoute-Progress` | Response | `enabled` if progress tracking on |
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
```bash
|
||||
POST /v1/embeddings
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "nebius/Qwen/Qwen3-Embedding-8B",
|
||||
"input": "The food was delicious"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: Nebius, OpenAI, Mistral, Together AI, Fireworks, NVIDIA.
|
||||
|
||||
```bash
|
||||
# List all embedding models
|
||||
GET /v1/embeddings
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Image Generation
|
||||
|
||||
```bash
|
||||
POST /v1/images/generations
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "openai/dall-e-3",
|
||||
"prompt": "A beautiful sunset over mountains",
|
||||
"size": "1024x1024"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: OpenAI (DALL-E), xAI (Grok Image), Together AI (FLUX), Fireworks AI.
|
||||
|
||||
```bash
|
||||
# List all image models
|
||||
GET /v1/images/generations
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## List Models
|
||||
|
||||
```bash
|
||||
GET /v1/models
|
||||
Authorization: Bearer your-api-key
|
||||
|
||||
→ Returns all chat, embedding, and image models + combos in OpenAI format
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Compatibility Endpoints
|
||||
|
||||
| Method | Path | Format |
|
||||
| ------ | --------------------------- | ---------------------- |
|
||||
| POST | `/v1/chat/completions` | OpenAI |
|
||||
| POST | `/v1/messages` | Anthropic |
|
||||
| POST | `/v1/responses` | OpenAI Responses |
|
||||
| POST | `/v1/embeddings` | OpenAI |
|
||||
| POST | `/v1/images/generations` | OpenAI |
|
||||
| GET | `/v1/models` | OpenAI |
|
||||
| POST | `/v1/messages/count_tokens` | Anthropic |
|
||||
| GET | `/v1beta/models` | Gemini |
|
||||
| POST | `/v1beta/models/{...path}` | Gemini generateContent |
|
||||
| POST | `/v1/api/chat` | Ollama |
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
```bash
|
||||
POST /v1/providers/{provider}/chat/completions
|
||||
POST /v1/providers/{provider}/embeddings
|
||||
POST /v1/providers/{provider}/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
---
|
||||
|
||||
## Semantic Cache
|
||||
|
||||
```bash
|
||||
# Get cache stats
|
||||
GET /api/cache
|
||||
|
||||
# Clear all caches
|
||||
DELETE /api/cache
|
||||
```
|
||||
|
||||
Response example:
|
||||
|
||||
```json
|
||||
{
|
||||
"semanticCache": {
|
||||
"memorySize": 42,
|
||||
"memoryMaxSize": 500,
|
||||
"dbSize": 128,
|
||||
"hitRate": 0.65
|
||||
},
|
||||
"idempotency": {
|
||||
"activeKeys": 3,
|
||||
"windowMs": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dashboard & Management
|
||||
|
||||
### Authentication
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------------- | ------- | --------------------- |
|
||||
| `/api/auth/login` | POST | Login |
|
||||
| `/api/auth/logout` | POST | Logout |
|
||||
| `/api/settings/require-login` | GET/PUT | Toggle login required |
|
||||
|
||||
### Provider Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------- | --------------- | ------------------------ |
|
||||
| `/api/providers` | GET/POST | List / create providers |
|
||||
| `/api/providers/[id]` | GET/PUT/DELETE | Manage a provider |
|
||||
| `/api/providers/[id]/test` | POST | Test provider connection |
|
||||
| `/api/providers/[id]/models` | GET | List provider models |
|
||||
| `/api/providers/validate` | POST | Validate provider config |
|
||||
| `/api/provider-nodes*` | Various | Provider node management |
|
||||
| `/api/provider-models` | GET/POST/DELETE | Custom models |
|
||||
|
||||
### OAuth Flows
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------------- | ------- | ----------------------- |
|
||||
| `/api/oauth/[provider]/[action]` | Various | Provider-specific OAuth |
|
||||
|
||||
### Routing & Config
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------- | -------- | ----------------------------- |
|
||||
| `/api/models/alias` | GET/POST | Model aliases |
|
||||
| `/api/models/catalog` | GET | All models by provider + type |
|
||||
| `/api/combos*` | Various | Combo management |
|
||||
| `/api/keys*` | Various | API key management |
|
||||
| `/api/pricing` | GET | Model pricing |
|
||||
|
||||
### Usage & Analytics
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | -------------------- |
|
||||
| `/api/usage/history` | GET | Usage history |
|
||||
| `/api/usage/logs` | GET | Usage logs |
|
||||
| `/api/usage/request-logs` | GET | Request-level logs |
|
||||
| `/api/usage/[connectionId]` | GET | Per-connection usage |
|
||||
|
||||
### Settings
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------------- | ------- | ---------------------- |
|
||||
| `/api/settings` | GET/PUT | General settings |
|
||||
| `/api/settings/proxy` | GET/PUT | Network proxy config |
|
||||
| `/api/settings/proxy/test` | POST | Test proxy connection |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt |
|
||||
|
||||
### Monitoring
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------ | ---------- | ----------------------- |
|
||||
| `/api/sessions` | GET | Active session tracking |
|
||||
| `/api/rate-limits` | GET | Per-account rate limits |
|
||||
| `/api/monitoring/health` | GET | Health check |
|
||||
| `/api/cache` | GET/DELETE | Cache stats / clear |
|
||||
|
||||
### Backup & Export/Import
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | --------------------------------------- |
|
||||
| `/api/db-backups` | GET | List available backups |
|
||||
| `/api/db-backups` | PUT | Create a manual backup |
|
||||
| `/api/db-backups` | POST | Restore from a specific backup |
|
||||
| `/api/db-backups/export` | GET | Download database as .sqlite file |
|
||||
| `/api/db-backups/import` | POST | Upload .sqlite file to replace database |
|
||||
| `/api/db-backups/exportAll` | GET | Download full backup as .tar.gz archive |
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------- | ------- | --------------------- |
|
||||
| `/api/sync/cloud` | Various | Cloud sync operations |
|
||||
| `/api/sync/initialize` | POST | Initialize sync |
|
||||
| `/api/cloud/*` | Various | Cloud management |
|
||||
|
||||
### CLI Tools
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------------- | ------ | ------------------- |
|
||||
| `/api/cli-tools/claude-settings` | GET | Claude CLI status |
|
||||
| `/api/cli-tools/codex-settings` | GET | Codex CLI status |
|
||||
| `/api/cli-tools/droid-settings` | GET | Droid CLI status |
|
||||
| `/api/cli-tools/openclaw-settings` | GET | OpenClaw CLI status |
|
||||
| `/api/cli-tools/runtime/[toolId]` | GET | Generic CLI runtime |
|
||||
|
||||
CLI responses include: `installed`, `runnable`, `command`, `commandPath`, `runtimeMode`, `reason`.
|
||||
|
||||
### ACP Agents
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------- | ------ | -------------------------------------------------------- |
|
||||
| `/api/acp/agents` | GET | List all detected agents (built-in + custom) with status |
|
||||
| `/api/acp/agents` | POST | Add custom agent or refresh detection cache |
|
||||
| `/api/acp/agents` | DELETE | Remove a custom agent by `id` query param |
|
||||
|
||||
GET response includes `agents[]` (id, name, binary, version, installed, protocol, isCustom) and `summary` (total, installed, notFound, builtIn, custom).
|
||||
|
||||
### Resilience & Rate Limits
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------- | ------- | ------------------------------- |
|
||||
| `/api/resilience` | GET/PUT | Get/update resilience profiles |
|
||||
| `/api/resilience/reset` | POST | Reset circuit breakers |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
| `/api/rate-limit` | GET | Global rate limit configuration |
|
||||
|
||||
### Evals
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------ | -------- | --------------------------------- |
|
||||
| `/api/evals` | GET/POST | List eval suites / run evaluation |
|
||||
|
||||
### Policies
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | --------------- | ----------------------- |
|
||||
| `/api/policies` | GET/POST/DELETE | Manage routing policies |
|
||||
|
||||
### Compliance
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | ----------------------------- |
|
||||
| `/api/compliance/audit-log` | GET | Compliance audit log (last N) |
|
||||
|
||||
### v1beta (Gemini-Compatible)
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------- | ------ | --------------------------------- |
|
||||
| `/v1beta/models` | GET | List models in Gemini format |
|
||||
| `/v1beta/models/{...path}` | POST | Gemini `generateContent` endpoint |
|
||||
|
||||
These endpoints mirror Gemini's API format for clients that expect native Gemini SDK compatibility.
|
||||
|
||||
### Internal / System APIs
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | ------ | ---------------------------------------------------- |
|
||||
| `/api/init` | GET | Application initialization check (used on first run) |
|
||||
| `/api/tags` | GET | Ollama-compatible model tags (for Ollama clients) |
|
||||
| `/api/restart` | POST | Trigger graceful server restart |
|
||||
| `/api/shutdown` | POST | Trigger graceful server shutdown |
|
||||
|
||||
> **Note:** These endpoints are used internally by the system or for Ollama client compatibility. They are not typically called by end users.
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
```
|
||||
|
||||
Transcribe audio files using Deepgram or AssemblyAI.
|
||||
|
||||
**Request:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@recording.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "Hello, this is the transcribed audio content.",
|
||||
"task": "transcribe",
|
||||
"language": "en",
|
||||
"duration": 12.5
|
||||
}
|
||||
```
|
||||
|
||||
**Supported providers:** `deepgram/nova-3`, `assemblyai/best`.
|
||||
|
||||
**Supported formats:** `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
## Ollama Compatibility
|
||||
|
||||
For clients that use Ollama's API format:
|
||||
|
||||
```bash
|
||||
# Chat endpoint (Ollama format)
|
||||
POST /v1/api/chat
|
||||
|
||||
# Model listing (Ollama format)
|
||||
GET /api/tags
|
||||
```
|
||||
|
||||
Requests are automatically translated between Ollama and internal formats.
|
||||
|
||||
---
|
||||
|
||||
## Telemetry
|
||||
|
||||
```bash
|
||||
# Get latency telemetry summary (p50/p95/p99 per provider)
|
||||
GET /api/telemetry/summary
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"claudeCode": { "p50": 245, "p95": 890, "p99": 1200, "count": 150 },
|
||||
"github": { "p50": 180, "p95": 620, "p99": 950, "count": 320 }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Budget
|
||||
|
||||
```bash
|
||||
# Get budget status for all API keys
|
||||
GET /api/usage/budget
|
||||
|
||||
# Set or update a budget
|
||||
POST /api/usage/budget
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"keyId": "key-123",
|
||||
"limit": 50.00,
|
||||
"period": "monthly"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Availability
|
||||
|
||||
```bash
|
||||
# Get real-time model availability across all providers
|
||||
GET /api/models/availability
|
||||
|
||||
# Check availability for a specific model
|
||||
POST /api/models/availability
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "claude-sonnet-4-5-20250929"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Request Processing
|
||||
|
||||
1. Client sends request to `/v1/*`
|
||||
2. Route handler calls `handleChat`, `handleEmbedding`, `handleAudioTranscription`, or `handleImageGeneration`
|
||||
3. Model is resolved (direct provider/model or alias/combo)
|
||||
4. Credentials selected from local DB with account availability filtering
|
||||
5. For chat: `handleChatCore` — format detection, translation, cache check, idempotency check
|
||||
6. Provider executor sends upstream request
|
||||
7. Response translated back to client format (chat) or returned as-is (embeddings/images/audio)
|
||||
8. Usage/logging recorded
|
||||
9. Fallback applies on errors according to combo rules
|
||||
|
||||
Full architecture reference: [`ARCHITECTURE.md`](ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
- Dashboard routes (`/dashboard/*`) use `auth_token` cookie
|
||||
- Login uses saved password hash; fallback to `INITIAL_PASSWORD`
|
||||
- `requireLogin` toggleable via `/api/settings/require-login`
|
||||
- `/v1/*` routes optionally require Bearer API key when `REQUIRE_API_KEY=true`
|
||||
@@ -0,0 +1,787 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/ARCHITECTURE.md) · 🇪🇸 [es](../es/ARCHITECTURE.md) · 🇫🇷 [fr](../fr/ARCHITECTURE.md) · 🇩🇪 [de](../de/ARCHITECTURE.md) · 🇮🇹 [it](../it/ARCHITECTURE.md) · 🇷🇺 [ru](../ru/ARCHITECTURE.md) · 🇨🇳 [zh-CN](../zh-CN/ARCHITECTURE.md) · 🇯🇵 [ja](../ja/ARCHITECTURE.md) · 🇰🇷 [ko](../ko/ARCHITECTURE.md) · 🇸🇦 [ar](../ar/ARCHITECTURE.md) · 🇮🇳 [in](../in/ARCHITECTURE.md) · 🇹🇭 [th](../th/ARCHITECTURE.md) · 🇻🇳 [vi](../vi/ARCHITECTURE.md) · 🇮🇩 [id](../id/ARCHITECTURE.md) · 🇲🇾 [ms](../ms/ARCHITECTURE.md) · 🇳🇱 [nl](../nl/ARCHITECTURE.md) · 🇵🇱 [pl](../pl/ARCHITECTURE.md) · 🇸🇪 [sv](../sv/ARCHITECTURE.md) · 🇳🇴 [no](../no/ARCHITECTURE.md) · 🇩🇰 [da](../da/ARCHITECTURE.md) · 🇫🇮 [fi](../fi/ARCHITECTURE.md) · 🇵🇹 [pt](../pt/ARCHITECTURE.md) · 🇷🇴 [ro](../ro/ARCHITECTURE.md) · 🇭🇺 [hu](../hu/ARCHITECTURE.md) · 🇧🇬 [bg](../bg/ARCHITECTURE.md) · 🇸🇰 [sk](../sk/ARCHITECTURE.md) · 🇺🇦 [uk-UA](../uk-UA/ARCHITECTURE.md) · 🇮🇱 [he](../he/ARCHITECTURE.md) · 🇵🇭 [phi](../phi/ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Architecture
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](ARCHITECTURE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/ARCHITECTURE.md) | 🇪🇸 [Español](i18n/es/ARCHITECTURE.md) | 🇫🇷 [Français](i18n/fr/ARCHITECTURE.md) | 🇮🇹 [Italiano](i18n/it/ARCHITECTURE.md) | 🇷🇺 [Русский](i18n/ru/ARCHITECTURE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/ARCHITECTURE.md) | 🇩🇪 [Deutsch](i18n/de/ARCHITECTURE.md) | 🇮🇳 [हिन्दी](i18n/in/ARCHITECTURE.md) | 🇹🇭 [ไทย](i18n/th/ARCHITECTURE.md) | 🇺🇦 [Українська](i18n/uk-UA/ARCHITECTURE.md) | 🇸🇦 [العربية](i18n/ar/ARCHITECTURE.md) | 🇯🇵 [日本語](i18n/ja/ARCHITECTURE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/ARCHITECTURE.md) | 🇧🇬 [Български](i18n/bg/ARCHITECTURE.md) | 🇩🇰 [Dansk](i18n/da/ARCHITECTURE.md) | 🇫🇮 [Suomi](i18n/fi/ARCHITECTURE.md) | 🇮🇱 [עברית](i18n/he/ARCHITECTURE.md) | 🇭🇺 [Magyar](i18n/hu/ARCHITECTURE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/ARCHITECTURE.md) | 🇰🇷 [한국어](i18n/ko/ARCHITECTURE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/ARCHITECTURE.md) | 🇳🇱 [Nederlands](i18n/nl/ARCHITECTURE.md) | 🇳🇴 [Norsk](i18n/no/ARCHITECTURE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/ARCHITECTURE.md) | 🇷🇴 [Română](i18n/ro/ARCHITECTURE.md) | 🇵🇱 [Polski](i18n/pl/ARCHITECTURE.md) | 🇸🇰 [Slovenčina](i18n/sk/ARCHITECTURE.md) | 🇸🇪 [Svenska](i18n/sv/ARCHITECTURE.md) | 🇵🇭 [Filipino](i18n/phi/ARCHITECTURE.md)
|
||||
|
||||
_Last updated: 2026-03-04_
|
||||
|
||||
## Executive Summary
|
||||
|
||||
OmniRoute is a local AI routing gateway and dashboard built on Next.js.
|
||||
It provides a single OpenAI-compatible endpoint (`/v1/*`) and routes traffic across multiple upstream providers with translation, fallback, token refresh, and usage tracking.
|
||||
|
||||
Core capabilities:
|
||||
|
||||
- OpenAI-compatible API surface for CLI/tools (28 providers)
|
||||
- Request/response translation across provider formats
|
||||
- Model combo fallback (multi-model sequence)
|
||||
- Account-level fallback (multi-account per provider)
|
||||
- OAuth + API-key provider connection management
|
||||
- Embedding generation via `/v1/embeddings` (6 providers, 9 models)
|
||||
- Image generation via `/v1/images/generations` (4 providers, 9 models)
|
||||
- Think tag parsing (`<think>...</think>`) for reasoning models
|
||||
- Response sanitization for strict OpenAI SDK compatibility
|
||||
- Role normalization (developer→system, system→user) for cross-provider compatibility
|
||||
- Structured output conversion (json_schema → Gemini responseSchema)
|
||||
- Local persistence for providers, keys, aliases, combos, settings, pricing
|
||||
- Usage/cost tracking and request logging
|
||||
- Optional cloud sync for multi-device/state sync
|
||||
- IP allowlist/blocklist for API access control
|
||||
- Thinking budget management (passthrough/auto/custom/adaptive)
|
||||
- Global system prompt injection
|
||||
- Session tracking and fingerprinting
|
||||
- Per-account enhanced rate limiting with provider-specific profiles
|
||||
- Circuit breaker pattern for provider resilience
|
||||
- Anti-thundering herd protection with mutex locking
|
||||
- Signature-based request deduplication cache
|
||||
- Domain layer: model availability, cost rules, fallback policy, lockout policy
|
||||
- Domain state persistence (SQLite write-through cache for fallbacks, budgets, lockouts, circuit breakers)
|
||||
- Policy engine for centralized request evaluation (lockout → budget → fallback)
|
||||
- Request telemetry with p50/p95/p99 latency aggregation
|
||||
- Correlation ID (X-Request-Id) for end-to-end tracing
|
||||
- Compliance audit logging with opt-out per API key
|
||||
- Eval framework for LLM quality assurance
|
||||
- Resilience UI dashboard with real-time circuit breaker status
|
||||
- Modular OAuth providers (12 individual modules under `src/lib/oauth/providers/`)
|
||||
|
||||
Primary runtime model:
|
||||
|
||||
- Next.js app routes under `src/app/api/*` implement both dashboard APIs and compatibility APIs
|
||||
- A shared SSE/routing core in `src/sse/*` + `open-sse/*` handles provider execution, translation, streaming, fallback, and usage
|
||||
|
||||
## Scope and Boundaries
|
||||
|
||||
### In Scope
|
||||
|
||||
- Local gateway runtime
|
||||
- Dashboard management APIs
|
||||
- Provider authentication and token refresh
|
||||
- Request translation and SSE streaming
|
||||
- Local state + usage persistence
|
||||
- Optional cloud sync orchestration
|
||||
|
||||
### Out of Scope
|
||||
|
||||
- Cloud service implementation behind `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Provider SLA/control plane outside local process
|
||||
- External CLI binaries themselves (Claude CLI, Codex CLI, etc.)
|
||||
|
||||
## High-Level System Context
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Clients[Developer Clients]
|
||||
C1[Claude Code]
|
||||
C2[Codex CLI]
|
||||
C3[OpenClaw / Droid / Cline / Continue / Roo]
|
||||
C4[Custom OpenAI-compatible clients]
|
||||
BROWSER[Browser Dashboard]
|
||||
end
|
||||
|
||||
subgraph Router[OmniRoute Local Process]
|
||||
API[V1 Compatibility API\n/v1/*]
|
||||
DASH[Dashboard + Management API\n/api/*]
|
||||
CORE[SSE + Translation Core\nopen-sse + src/sse]
|
||||
DB[(storage.sqlite)]
|
||||
UDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph Upstreams[Upstream Providers]
|
||||
P1[OAuth Providers\nClaude/Codex/Gemini/Qwen/iFlow/GitHub/Kiro/Cursor/Antigravity]
|
||||
P2[API Key Providers\nOpenAI/Anthropic/OpenRouter/GLM/Kimi/MiniMax\nDeepSeek/Groq/xAI/Mistral/Perplexity\nTogether/Fireworks/Cerebras/Cohere/NVIDIA]
|
||||
P3[Compatible Nodes\nOpenAI-compatible / Anthropic-compatible]
|
||||
end
|
||||
|
||||
subgraph Cloud[Optional Cloud Sync]
|
||||
CLOUD[Cloud Sync Endpoint\nNEXT_PUBLIC_CLOUD_URL]
|
||||
end
|
||||
|
||||
C1 --> API
|
||||
C2 --> API
|
||||
C3 --> API
|
||||
C4 --> API
|
||||
BROWSER --> DASH
|
||||
|
||||
API --> CORE
|
||||
DASH --> DB
|
||||
CORE --> DB
|
||||
CORE --> UDB
|
||||
|
||||
CORE --> P1
|
||||
CORE --> P2
|
||||
CORE --> P3
|
||||
|
||||
DASH --> CLOUD
|
||||
```
|
||||
|
||||
## Core Runtime Components
|
||||
|
||||
## 1) API and Routing Layer (Next.js App Routes)
|
||||
|
||||
Main directories:
|
||||
|
||||
- `src/app/api/v1/*` and `src/app/api/v1beta/*` for compatibility APIs
|
||||
- `src/app/api/*` for management/configuration APIs
|
||||
- Next rewrites in `next.config.mjs` map `/v1/*` to `/api/v1/*`
|
||||
|
||||
Important compatibility routes:
|
||||
|
||||
- `src/app/api/v1/chat/completions/route.ts`
|
||||
- `src/app/api/v1/messages/route.ts`
|
||||
- `src/app/api/v1/responses/route.ts`
|
||||
- `src/app/api/v1/models/route.ts` — includes custom models with `custom: true`
|
||||
- `src/app/api/v1/embeddings/route.ts` — embedding generation (6 providers)
|
||||
- `src/app/api/v1/images/generations/route.ts` — image generation (4+ providers incl. Antigravity/Nebius)
|
||||
- `src/app/api/v1/messages/count_tokens/route.ts`
|
||||
- `src/app/api/v1/providers/[provider]/chat/completions/route.ts` — dedicated per-provider chat
|
||||
- `src/app/api/v1/providers/[provider]/embeddings/route.ts` — dedicated per-provider embeddings
|
||||
- `src/app/api/v1/providers/[provider]/images/generations/route.ts` — dedicated per-provider images
|
||||
- `src/app/api/v1beta/models/route.ts`
|
||||
- `src/app/api/v1beta/models/[...path]/route.ts`
|
||||
|
||||
Management domains:
|
||||
|
||||
- Auth/settings: `src/app/api/auth/*`, `src/app/api/settings/*`
|
||||
- Providers/connections: `src/app/api/providers*`
|
||||
- Provider nodes: `src/app/api/provider-nodes*`
|
||||
- Custom models: `src/app/api/provider-models` (GET/POST/DELETE)
|
||||
- Model catalog: `src/app/api/models/route.ts` (GET)
|
||||
- Proxy config: `src/app/api/settings/proxy` (GET/PUT/DELETE) + `src/app/api/settings/proxy/test` (POST)
|
||||
- OAuth: `src/app/api/oauth/*`
|
||||
- Keys/aliases/combos/pricing: `src/app/api/keys*`, `src/app/api/models/alias`, `src/app/api/combos*`, `src/app/api/pricing`
|
||||
- Usage: `src/app/api/usage/*`
|
||||
- Sync/cloud: `src/app/api/sync/*`, `src/app/api/cloud/*`
|
||||
- CLI tooling helpers: `src/app/api/cli-tools/*`
|
||||
- IP filter: `src/app/api/settings/ip-filter` (GET/PUT)
|
||||
- Thinking budget: `src/app/api/settings/thinking-budget` (GET/PUT)
|
||||
- System prompt: `src/app/api/settings/system-prompt` (GET/PUT)
|
||||
- Sessions: `src/app/api/sessions` (GET)
|
||||
- Rate limits: `src/app/api/rate-limits` (GET)
|
||||
- Resilience: `src/app/api/resilience` (GET/PATCH) — provider profiles, circuit breaker, rate limit state
|
||||
- Resilience reset: `src/app/api/resilience/reset` (POST) — reset breakers + cooldowns
|
||||
- Cache stats: `src/app/api/cache/stats` (GET/DELETE)
|
||||
- Model availability: `src/app/api/models/availability` (GET/POST)
|
||||
- Telemetry: `src/app/api/telemetry/summary` (GET)
|
||||
- Budget: `src/app/api/usage/budget` (GET/POST)
|
||||
- Fallback chains: `src/app/api/fallback/chains` (GET/POST/DELETE)
|
||||
- Compliance audit: `src/app/api/compliance/audit-log` (GET)
|
||||
- Evals: `src/app/api/evals` (GET/POST), `src/app/api/evals/[suiteId]` (GET)
|
||||
- Policies: `src/app/api/policies` (GET/POST)
|
||||
|
||||
## 2) SSE + Translation Core
|
||||
|
||||
Main flow modules:
|
||||
|
||||
- Entry: `src/sse/handlers/chat.ts`
|
||||
- Core orchestration: `open-sse/handlers/chatCore.ts`
|
||||
- Provider execution adapters: `open-sse/executors/*`
|
||||
- Format detection/provider config: `open-sse/services/provider.ts`
|
||||
- Model parse/resolve: `src/sse/services/model.ts`, `open-sse/services/model.ts`
|
||||
- Account fallback logic: `open-sse/services/accountFallback.ts`
|
||||
- Translation registry: `open-sse/translator/index.ts`
|
||||
- Stream transformations: `open-sse/utils/stream.ts`, `open-sse/utils/streamHandler.ts`
|
||||
- Usage extraction/normalization: `open-sse/utils/usageTracking.ts`
|
||||
- Think tag parser: `open-sse/utils/thinkTagParser.ts`
|
||||
- Embedding handler: `open-sse/handlers/embeddings.ts`
|
||||
- Embedding provider registry: `open-sse/config/embeddingRegistry.ts`
|
||||
- Image generation handler: `open-sse/handlers/imageGeneration.ts`
|
||||
- Image provider registry: `open-sse/config/imageRegistry.ts`
|
||||
- Response sanitization: `open-sse/handlers/responseSanitizer.ts`
|
||||
- Role normalization: `open-sse/services/roleNormalizer.ts`
|
||||
|
||||
Services (business logic):
|
||||
|
||||
- Account selection/scoring: `open-sse/services/accountSelector.ts`
|
||||
- Context lifecycle management: `open-sse/services/contextManager.ts`
|
||||
- IP filter enforcement: `open-sse/services/ipFilter.ts`
|
||||
- Session tracking: `open-sse/services/sessionManager.ts`
|
||||
- Request deduplication: `open-sse/services/signatureCache.ts`
|
||||
- System prompt injection: `open-sse/services/systemPrompt.ts`
|
||||
- Thinking budget management: `open-sse/services/thinkingBudget.ts`
|
||||
- Wildcard model routing: `open-sse/services/wildcardRouter.ts`
|
||||
- Rate limit management: `open-sse/services/rateLimitManager.ts`
|
||||
- Circuit breaker: `open-sse/services/circuitBreaker.ts`
|
||||
|
||||
Domain layer modules:
|
||||
|
||||
- Model availability: `src/lib/domain/modelAvailability.ts`
|
||||
- Cost rules/budgets: `src/lib/domain/costRules.ts`
|
||||
- Fallback policy: `src/lib/domain/fallbackPolicy.ts`
|
||||
- Combo resolver: `src/lib/domain/comboResolver.ts`
|
||||
- Lockout policy: `src/lib/domain/lockoutPolicy.ts`
|
||||
- Policy engine: `src/domain/policyEngine.ts` — centralized lockout → budget → fallback evaluation
|
||||
- Error codes catalog: `src/lib/domain/errorCodes.ts`
|
||||
- Request ID: `src/lib/domain/requestId.ts`
|
||||
- Fetch timeout: `src/lib/domain/fetchTimeout.ts`
|
||||
- Request telemetry: `src/lib/domain/requestTelemetry.ts`
|
||||
- Compliance/audit: `src/lib/domain/compliance/index.ts`
|
||||
- Eval runner: `src/lib/domain/evalRunner.ts`
|
||||
- Domain state persistence: `src/lib/db/domainState.ts` — SQLite CRUD for fallback chains, budgets, cost history, lockout state, circuit breakers
|
||||
|
||||
OAuth provider modules (12 individual files under `src/lib/oauth/providers/`):
|
||||
|
||||
- Registry index: `src/lib/oauth/providers/index.ts`
|
||||
- Individual providers: `claude.ts`, `codex.ts`, `gemini.ts`, `antigravity.ts`, `iflow.ts`, `qwen.ts`, `kimi-coding.ts`, `github.ts`, `kiro.ts`, `cursor.ts`, `kilocode.ts`, `cline.ts`
|
||||
- Thin wrapper: `src/lib/oauth/providers.ts` — re-exports from individual modules
|
||||
|
||||
## 3) Persistence Layer
|
||||
|
||||
Primary state DB (SQLite):
|
||||
|
||||
- Core infra: `src/lib/db/core.ts` (better-sqlite3, migrations, WAL)
|
||||
- Re-export facade: `src/lib/localDb.ts` (thin compatibility layer for callers)
|
||||
- file: `${DATA_DIR}/storage.sqlite` (or `$XDG_CONFIG_HOME/omniroute/storage.sqlite` when set, else `~/.omniroute/storage.sqlite`)
|
||||
- entities (tables + KV namespaces): providerConnections, providerNodes, modelAliases, combos, apiKeys, settings, pricing, **customModels**, **proxyConfig**, **ipFilter**, **thinkingBudget**, **systemPrompt**
|
||||
|
||||
Usage persistence:
|
||||
|
||||
- facade: `src/lib/usageDb.ts` (decomposed modules in `src/lib/usage/*`)
|
||||
- SQLite tables in `storage.sqlite`: `usage_history`, `call_logs`, `proxy_logs`
|
||||
- optional file artifacts remain for compatibility/debug (`${DATA_DIR}/log.txt`, `${DATA_DIR}/call_logs/`, `<repo>/logs/...`)
|
||||
- legacy JSON files are migrated to SQLite by startup migrations when present
|
||||
|
||||
Domain State DB (SQLite):
|
||||
|
||||
- `src/lib/db/domainState.ts` — CRUD operations for domain state
|
||||
- Tables (created in `src/lib/db/core.ts`): `domain_fallback_chains`, `domain_budgets`, `domain_cost_history`, `domain_lockout_state`, `domain_circuit_breakers`
|
||||
- Write-through cache pattern: in-memory Maps are authoritative at runtime; mutations are written synchronously to SQLite; state is restored from DB on cold start
|
||||
|
||||
## 4) Auth + Security Surfaces
|
||||
|
||||
- Dashboard cookie auth: `src/proxy.ts`, `src/app/api/auth/login/route.ts`
|
||||
- API key generation/verification: `src/shared/utils/apiKey.ts`
|
||||
- Provider secrets persisted in `providerConnections` entries
|
||||
- Outbound proxy support via `open-sse/utils/proxyFetch.ts` (env vars) and `open-sse/utils/networkProxy.ts` (configurable per-provider or global)
|
||||
|
||||
## 5) Cloud Sync
|
||||
|
||||
- Scheduler init: `src/lib/initCloudSync.ts`, `src/shared/services/initializeCloudSync.ts`
|
||||
- Periodic task: `src/shared/services/cloudSyncScheduler.ts`
|
||||
- Control route: `src/app/api/sync/cloud/route.ts`
|
||||
|
||||
## Request Lifecycle (`/v1/chat/completions`)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant Client as CLI/SDK Client
|
||||
participant Route as /api/v1/chat/completions
|
||||
participant Chat as src/sse/handlers/chat
|
||||
participant Core as open-sse/handlers/chatCore
|
||||
participant Model as Model Resolver
|
||||
participant Auth as Credential Selector
|
||||
participant Exec as Provider Executor
|
||||
participant Prov as Upstream Provider
|
||||
participant Stream as Stream Translator
|
||||
participant Usage as usageDb
|
||||
|
||||
Client->>Route: POST /v1/chat/completions
|
||||
Route->>Chat: handleChat(request)
|
||||
Chat->>Model: parse/resolve model or combo
|
||||
|
||||
alt Combo model
|
||||
Chat->>Chat: iterate combo models (handleComboChat)
|
||||
end
|
||||
|
||||
Chat->>Auth: getProviderCredentials(provider)
|
||||
Auth-->>Chat: active account + tokens/api key
|
||||
|
||||
Chat->>Core: handleChatCore(body, modelInfo, credentials)
|
||||
Core->>Core: detect source format
|
||||
Core->>Core: translate request to target format
|
||||
Core->>Exec: execute(provider, transformedBody)
|
||||
Exec->>Prov: upstream API call
|
||||
Prov-->>Exec: SSE/JSON response
|
||||
Exec-->>Core: response + metadata
|
||||
|
||||
alt 401/403
|
||||
Core->>Exec: refreshCredentials()
|
||||
Exec-->>Core: updated tokens
|
||||
Core->>Exec: retry request
|
||||
end
|
||||
|
||||
Core->>Stream: translate/normalize stream to client format
|
||||
Stream-->>Client: SSE chunks / JSON response
|
||||
|
||||
Stream->>Usage: extract usage + persist history/log
|
||||
```
|
||||
|
||||
## Combo + Account Fallback Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[Incoming model string] --> B{Is combo name?}
|
||||
B -- Yes --> C[Load combo models sequence]
|
||||
B -- No --> D[Single model path]
|
||||
|
||||
C --> E[Try model N]
|
||||
E --> F[Resolve provider/model]
|
||||
D --> F
|
||||
|
||||
F --> G[Select account credentials]
|
||||
G --> H{Credentials available?}
|
||||
H -- No --> I[Return provider unavailable]
|
||||
H -- Yes --> J[Execute request]
|
||||
|
||||
J --> K{Success?}
|
||||
K -- Yes --> L[Return response]
|
||||
K -- No --> M{Fallback-eligible error?}
|
||||
|
||||
M -- No --> N[Return error]
|
||||
M -- Yes --> O[Mark account unavailable cooldown]
|
||||
O --> P{Another account for provider?}
|
||||
P -- Yes --> G
|
||||
P -- No --> Q{In combo with next model?}
|
||||
Q -- Yes --> E
|
||||
Q -- No --> R[Return all unavailable]
|
||||
```
|
||||
|
||||
Fallback decisions are driven by `open-sse/services/accountFallback.ts` using status codes and error-message heuristics.
|
||||
|
||||
## OAuth Onboarding and Token Refresh Lifecycle
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Dashboard UI
|
||||
participant OAuth as /api/oauth/[provider]/[action]
|
||||
participant ProvAuth as Provider Auth Server
|
||||
participant DB as localDb
|
||||
participant Test as /api/providers/[id]/test
|
||||
participant Exec as Provider Executor
|
||||
|
||||
UI->>OAuth: GET authorize or device-code
|
||||
OAuth->>ProvAuth: create auth/device flow
|
||||
ProvAuth-->>OAuth: auth URL or device code payload
|
||||
OAuth-->>UI: flow data
|
||||
|
||||
UI->>OAuth: POST exchange or poll
|
||||
OAuth->>ProvAuth: token exchange/poll
|
||||
ProvAuth-->>OAuth: access/refresh tokens
|
||||
OAuth->>DB: createProviderConnection(oauth data)
|
||||
OAuth-->>UI: success + connection id
|
||||
|
||||
UI->>Test: POST /api/providers/[id]/test
|
||||
Test->>Exec: validate credentials / optional refresh
|
||||
Exec-->>Test: valid or refreshed token info
|
||||
Test->>DB: update status/tokens/errors
|
||||
Test-->>UI: validation result
|
||||
```
|
||||
|
||||
Refresh during live traffic is executed inside `open-sse/handlers/chatCore.ts` via executor `refreshCredentials()`.
|
||||
|
||||
## Cloud Sync Lifecycle (Enable / Sync / Disable)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Endpoint Page UI
|
||||
participant Sync as /api/sync/cloud
|
||||
participant DB as localDb
|
||||
participant Cloud as External Cloud Sync
|
||||
participant Claude as ~/.claude/settings.json
|
||||
|
||||
UI->>Sync: POST action=enable
|
||||
Sync->>DB: set cloudEnabled=true
|
||||
Sync->>DB: ensure API key exists
|
||||
Sync->>Cloud: POST /sync/{machineId} (providers/aliases/combos/keys)
|
||||
Cloud-->>Sync: sync result
|
||||
Sync->>Cloud: GET /{machineId}/v1/verify
|
||||
Sync-->>UI: enabled + verification status
|
||||
|
||||
UI->>Sync: POST action=sync
|
||||
Sync->>Cloud: POST /sync/{machineId}
|
||||
Cloud-->>Sync: remote data
|
||||
Sync->>DB: update newer local tokens/status
|
||||
Sync-->>UI: synced
|
||||
|
||||
UI->>Sync: POST action=disable
|
||||
Sync->>DB: set cloudEnabled=false
|
||||
Sync->>Cloud: DELETE /sync/{machineId}
|
||||
Sync->>Claude: switch ANTHROPIC_BASE_URL back to local (if needed)
|
||||
Sync-->>UI: disabled
|
||||
```
|
||||
|
||||
Periodic sync is triggered by `CloudSyncScheduler` when cloud is enabled.
|
||||
|
||||
## Data Model and Storage Map
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
SETTINGS ||--o{ PROVIDER_CONNECTION : controls
|
||||
PROVIDER_NODE ||--o{ PROVIDER_CONNECTION : backs_compatible_provider
|
||||
PROVIDER_CONNECTION ||--o{ USAGE_ENTRY : emits_usage
|
||||
|
||||
SETTINGS {
|
||||
boolean cloudEnabled
|
||||
number stickyRoundRobinLimit
|
||||
boolean requireLogin
|
||||
string password_hash
|
||||
string fallbackStrategy
|
||||
json rateLimitDefaults
|
||||
json providerProfiles
|
||||
}
|
||||
|
||||
PROVIDER_CONNECTION {
|
||||
string id
|
||||
string provider
|
||||
string authType
|
||||
string name
|
||||
number priority
|
||||
boolean isActive
|
||||
string apiKey
|
||||
string accessToken
|
||||
string refreshToken
|
||||
string expiresAt
|
||||
string testStatus
|
||||
string lastError
|
||||
string rateLimitedUntil
|
||||
json providerSpecificData
|
||||
}
|
||||
|
||||
PROVIDER_NODE {
|
||||
string id
|
||||
string type
|
||||
string name
|
||||
string prefix
|
||||
string apiType
|
||||
string baseUrl
|
||||
}
|
||||
|
||||
MODEL_ALIAS {
|
||||
string alias
|
||||
string targetModel
|
||||
}
|
||||
|
||||
COMBO {
|
||||
string id
|
||||
string name
|
||||
string[] models
|
||||
}
|
||||
|
||||
API_KEY {
|
||||
string id
|
||||
string name
|
||||
string key
|
||||
string machineId
|
||||
}
|
||||
|
||||
USAGE_ENTRY {
|
||||
string provider
|
||||
string model
|
||||
number prompt_tokens
|
||||
number completion_tokens
|
||||
string connectionId
|
||||
string timestamp
|
||||
}
|
||||
|
||||
CUSTOM_MODEL {
|
||||
string id
|
||||
string name
|
||||
string providerId
|
||||
}
|
||||
|
||||
PROXY_CONFIG {
|
||||
string global
|
||||
json providers
|
||||
}
|
||||
|
||||
IP_FILTER {
|
||||
string mode
|
||||
string[] allowlist
|
||||
string[] blocklist
|
||||
}
|
||||
|
||||
THINKING_BUDGET {
|
||||
string mode
|
||||
number customBudget
|
||||
string effortLevel
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT {
|
||||
boolean enabled
|
||||
string prompt
|
||||
string position
|
||||
}
|
||||
```
|
||||
|
||||
Physical storage files:
|
||||
|
||||
- primary runtime DB: `${DATA_DIR}/storage.sqlite`
|
||||
- request log lines: `${DATA_DIR}/log.txt` (compat/debug artifact)
|
||||
- structured call payload archives: `${DATA_DIR}/call_logs/`
|
||||
- optional translator/request debug sessions: `<repo>/logs/...`
|
||||
|
||||
## Deployment Topology
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph LocalHost[Developer Host]
|
||||
CLI[CLI Tools]
|
||||
Browser[Dashboard Browser]
|
||||
end
|
||||
|
||||
subgraph ContainerOrProcess[OmniRoute Runtime]
|
||||
Next[Next.js Server\nPORT=20128]
|
||||
Core[SSE Core + Executors]
|
||||
MainDB[(storage.sqlite)]
|
||||
UsageDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph External[External Services]
|
||||
Providers[AI Providers]
|
||||
SyncCloud[Cloud Sync Service]
|
||||
end
|
||||
|
||||
CLI --> Next
|
||||
Browser --> Next
|
||||
Next --> Core
|
||||
Next --> MainDB
|
||||
Core --> MainDB
|
||||
Core --> UsageDB
|
||||
Core --> Providers
|
||||
Next --> SyncCloud
|
||||
```
|
||||
|
||||
## Module Mapping (Decision-Critical)
|
||||
|
||||
### Route and API Modules
|
||||
|
||||
- `src/app/api/v1/*`, `src/app/api/v1beta/*`: compatibility APIs
|
||||
- `src/app/api/v1/providers/[provider]/*`: dedicated per-provider routes (chat, embeddings, images)
|
||||
- `src/app/api/providers*`: provider CRUD, validation, testing
|
||||
- `src/app/api/provider-nodes*`: custom compatible node management
|
||||
- `src/app/api/provider-models`: custom model management (CRUD)
|
||||
- `src/app/api/models/route.ts`: model catalog API (aliases + custom models)
|
||||
- `src/app/api/oauth/*`: OAuth/device-code flows
|
||||
- `src/app/api/keys*`: local API key lifecycle
|
||||
- `src/app/api/models/alias`: alias management
|
||||
- `src/app/api/combos*`: fallback combo management
|
||||
- `src/app/api/pricing`: pricing overrides for cost calculation
|
||||
- `src/app/api/settings/proxy`: proxy configuration (GET/PUT/DELETE)
|
||||
- `src/app/api/settings/proxy/test`: outbound proxy connectivity test (POST)
|
||||
- `src/app/api/usage/*`: usage and logs APIs
|
||||
- `src/app/api/sync/*` + `src/app/api/cloud/*`: cloud sync and cloud-facing helpers
|
||||
- `src/app/api/cli-tools/*`: local CLI config writers/checkers
|
||||
- `src/app/api/settings/ip-filter`: IP allowlist/blocklist (GET/PUT)
|
||||
- `src/app/api/settings/thinking-budget`: thinking token budget config (GET/PUT)
|
||||
- `src/app/api/settings/system-prompt`: global system prompt (GET/PUT)
|
||||
- `src/app/api/sessions`: active session listing (GET)
|
||||
- `src/app/api/rate-limits`: per-account rate limit status (GET)
|
||||
|
||||
### Routing and Execution Core
|
||||
|
||||
- `src/sse/handlers/chat.ts`: request parse, combo handling, account selection loop
|
||||
- `open-sse/handlers/chatCore.ts`: translation, executor dispatch, retry/refresh handling, stream setup
|
||||
- `open-sse/executors/*`: provider-specific network and format behavior
|
||||
|
||||
### Translation Registry and Format Converters
|
||||
|
||||
- `open-sse/translator/index.ts`: translator registry and orchestration
|
||||
- Request translators: `open-sse/translator/request/*`
|
||||
- Response translators: `open-sse/translator/response/*`
|
||||
- Format constants: `open-sse/translator/formats.ts`
|
||||
|
||||
### Persistence
|
||||
|
||||
- `src/lib/db/*`: persistent config/state and domain persistence on SQLite
|
||||
- `src/lib/localDb.ts`: compatibility re-export for DB modules
|
||||
- `src/lib/usageDb.ts`: usage history/call logs facade on top of SQLite tables
|
||||
|
||||
## Provider Executor Coverage (Strategy Pattern)
|
||||
|
||||
Each provider has a specialized executor extending `BaseExecutor` (in `open-sse/executors/base.ts`), which provides URL building, header construction, retry with exponential backoff, credential refresh hooks, and the `execute()` orchestration method.
|
||||
|
||||
| Executor | Provider(s) | Special Handling |
|
||||
| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------- |
|
||||
| `DefaultExecutor` | OpenAI, Claude, Gemini, Qwen, iFlow, OpenRouter, GLM, Kimi, MiniMax, DeepSeek, Groq, xAI, Mistral, Perplexity, Together, Fireworks, Cerebras, Cohere, NVIDIA | Dynamic URL/header config per provider |
|
||||
| `AntigravityExecutor` | Google Antigravity | Custom project/session IDs, Retry-After parsing |
|
||||
| `CodexExecutor` | OpenAI Codex | Injects system instructions, forces reasoning effort |
|
||||
| `CursorExecutor` | Cursor IDE | ConnectRPC protocol, Protobuf encoding, request signing via checksum |
|
||||
| `GithubExecutor` | GitHub Copilot | Copilot token refresh, VSCode-mimicking headers |
|
||||
| `KiroExecutor` | AWS CodeWhisperer/Kiro | AWS EventStream binary format → SSE conversion |
|
||||
| `GeminiCLIExecutor` | Gemini CLI | Google OAuth token refresh cycle |
|
||||
|
||||
All other providers (including custom compatible nodes) use the `DefaultExecutor`.
|
||||
|
||||
## Provider Compatibility Matrix
|
||||
|
||||
| Provider | Format | Auth | Stream | Non-Stream | Token Refresh | Usage API |
|
||||
| ---------------- | ---------------- | --------------------- | ---------------- | ---------- | ------------- | ------------------ |
|
||||
| Claude | claude | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Admin only |
|
||||
| Gemini | gemini | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Gemini CLI | gemini-cli | OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Antigravity | antigravity | OAuth | ✅ | ✅ | ✅ | ✅ Full quota API |
|
||||
| OpenAI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Codex | openai-responses | OAuth | ✅ forced | ❌ | ✅ | ✅ Rate limits |
|
||||
| GitHub Copilot | openai | OAuth + Copilot Token | ✅ | ✅ | ✅ | ✅ Quota snapshots |
|
||||
| Cursor | cursor | Custom checksum | ✅ | ✅ | ❌ | ❌ |
|
||||
| Kiro | kiro | AWS SSO OIDC | ✅ (EventStream) | ❌ | ✅ | ✅ Usage limits |
|
||||
| Qwen | openai | OAuth | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| iFlow | openai | OAuth (Basic) | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| OpenRouter | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| GLM/Kimi/MiniMax | claude | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| DeepSeek | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Groq | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| xAI (Grok) | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Mistral | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Perplexity | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Together AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Fireworks AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cerebras | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cohere | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| NVIDIA NIM | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
|
||||
## Format Translation Coverage
|
||||
|
||||
Detected source formats include:
|
||||
|
||||
- `openai`
|
||||
- `openai-responses`
|
||||
- `claude`
|
||||
- `gemini`
|
||||
|
||||
Target formats include:
|
||||
|
||||
- OpenAI chat/Responses
|
||||
- Claude
|
||||
- Gemini/Gemini-CLI/Antigravity envelope
|
||||
- Kiro
|
||||
- Cursor
|
||||
|
||||
Translations use **OpenAI as the hub format** — all conversions go through OpenAI as intermediate:
|
||||
|
||||
```
|
||||
Source Format → OpenAI (hub) → Target Format
|
||||
```
|
||||
|
||||
Translations are selected dynamically based on source payload shape and provider target format.
|
||||
|
||||
Additional processing layers in the translation pipeline:
|
||||
|
||||
- **Response sanitization** — Strips non-standard fields from OpenAI-format responses (both streaming and non-streaming) to ensure strict SDK compliance
|
||||
- **Role normalization** — Converts `developer` → `system` for non-OpenAI targets; merges `system` → `user` for models that reject the system role (GLM, ERNIE)
|
||||
- **Think tag extraction** — Parses `<think>...</think>` blocks from content into `reasoning_content` field
|
||||
- **Structured output** — Converts OpenAI `response_format.json_schema` to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
## Supported API Endpoints
|
||||
|
||||
| Endpoint | Format | Handler |
|
||||
| -------------------------------------------------- | ------------------ | ---------------------------------------------------- |
|
||||
| `POST /v1/chat/completions` | OpenAI Chat | `src/sse/handlers/chat.ts` |
|
||||
| `POST /v1/messages` | Claude Messages | Same handler (auto-detected) |
|
||||
| `POST /v1/responses` | OpenAI Responses | `open-sse/handlers/responsesHandler.ts` |
|
||||
| `POST /v1/embeddings` | OpenAI Embeddings | `open-sse/handlers/embeddings.ts` |
|
||||
| `GET /v1/embeddings` | Model listing | API route |
|
||||
| `POST /v1/images/generations` | OpenAI Images | `open-sse/handlers/imageGeneration.ts` |
|
||||
| `GET /v1/images/generations` | Model listing | API route |
|
||||
| `POST /v1/providers/{provider}/chat/completions` | OpenAI Chat | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/embeddings` | OpenAI Embeddings | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/images/generations` | OpenAI Images | Dedicated per-provider with model validation |
|
||||
| `POST /v1/messages/count_tokens` | Claude Token Count | API route |
|
||||
| `GET /v1/models` | OpenAI Models list | API route (chat + embedding + image + custom models) |
|
||||
| `GET /api/models/catalog` | Catalog | All models grouped by provider + type |
|
||||
| `POST /v1beta/models/*:streamGenerateContent` | Gemini native | API route |
|
||||
| `GET/PUT/DELETE /api/settings/proxy` | Proxy Config | Network proxy configuration |
|
||||
| `POST /api/settings/proxy/test` | Proxy Connectivity | Proxy health/connectivity test endpoint |
|
||||
| `GET/POST/DELETE /api/provider-models` | Custom Models | Custom model management per provider |
|
||||
|
||||
## Bypass Handler
|
||||
|
||||
The bypass handler (`open-sse/utils/bypassHandler.ts`) intercepts known "throwaway" requests from Claude CLI — warmup pings, title extractions, and token counts — and returns a **fake response** without consuming upstream provider tokens. This is triggered only when `User-Agent` contains `claude-cli`.
|
||||
|
||||
## Request Logger Pipeline
|
||||
|
||||
The request logger (`open-sse/utils/requestLogger.ts`) provides a 7-stage debug logging pipeline, disabled by default, enabled via `ENABLE_REQUEST_LOGS=true`:
|
||||
|
||||
```
|
||||
1_req_client.json → 2_req_source.json → 3_req_openai.json → 4_req_target.json
|
||||
→ 5_res_provider.txt → 6_res_openai.txt → 7_res_client.txt
|
||||
```
|
||||
|
||||
Files are written to `<repo>/logs/<session>/` for each request session.
|
||||
|
||||
## Failure Modes and Resilience
|
||||
|
||||
## 1) Account/Provider Availability
|
||||
|
||||
- provider account cooldown on transient/rate/auth errors
|
||||
- account fallback before failing request
|
||||
- combo model fallback when current model/provider path is exhausted
|
||||
|
||||
## 2) Token Expiry
|
||||
|
||||
- pre-check and refresh with retry for refreshable providers
|
||||
- 401/403 retry after refresh attempt in core path
|
||||
|
||||
## 3) Stream Safety
|
||||
|
||||
- disconnect-aware stream controller
|
||||
- translation stream with end-of-stream flush and `[DONE]` handling
|
||||
- usage estimation fallback when provider usage metadata is missing
|
||||
|
||||
## 4) Cloud Sync Degradation
|
||||
|
||||
- sync errors are surfaced but local runtime continues
|
||||
- scheduler has retry-capable logic, but periodic execution currently calls single-attempt sync by default
|
||||
|
||||
## 5) Data Integrity
|
||||
|
||||
- SQLite schema migrations and auto-upgrade hooks at startup
|
||||
- legacy JSON → SQLite migration compatibility path
|
||||
|
||||
## Observability and Operational Signals
|
||||
|
||||
Runtime visibility sources:
|
||||
|
||||
- console logs from `src/sse/utils/logger.ts`
|
||||
- per-request usage aggregates in SQLite (`usage_history`, `call_logs`, `proxy_logs`)
|
||||
- textual request status log in `log.txt` (optional/compat)
|
||||
- optional deep request/translation logs under `logs/` when `ENABLE_REQUEST_LOGS=true`
|
||||
- dashboard usage endpoints (`/api/usage/*`) for UI consumption
|
||||
|
||||
## Security-Sensitive Boundaries
|
||||
|
||||
- JWT secret (`JWT_SECRET`) secures dashboard session cookie verification/signing
|
||||
- Initial password bootstrap (`INITIAL_PASSWORD`) should be explicitly configured for first-run provisioning
|
||||
- API key HMAC secret (`API_KEY_SECRET`) secures generated local API key format
|
||||
- Provider secrets (API keys/tokens) are persisted in local DB and should be protected at filesystem level
|
||||
- Cloud sync endpoints rely on API key auth + machine id semantics
|
||||
|
||||
## Environment and Runtime Matrix
|
||||
|
||||
Environment variables actively used by code:
|
||||
|
||||
- App/auth: `JWT_SECRET`, `INITIAL_PASSWORD`
|
||||
- Storage: `DATA_DIR`
|
||||
- Compatible node behavior: `ALLOW_MULTI_CONNECTIONS_PER_COMPAT_NODE`
|
||||
- Optional storage base override (Linux/macOS when `DATA_DIR` unset): `XDG_CONFIG_HOME`
|
||||
- Security hashing: `API_KEY_SECRET`, `MACHINE_ID_SALT`
|
||||
- Logging: `ENABLE_REQUEST_LOGS`
|
||||
- Sync/cloud URLing: `NEXT_PUBLIC_BASE_URL`, `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Outbound proxy: `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY` and lowercase variants
|
||||
- SOCKS5 feature flags: `ENABLE_SOCKS5_PROXY`, `NEXT_PUBLIC_ENABLE_SOCKS5_PROXY`
|
||||
- Platform/runtime helpers (not app-specific config): `APPDATA`, `NODE_ENV`, `PORT`, `HOSTNAME`
|
||||
|
||||
## Known Architectural Notes
|
||||
|
||||
1. `usageDb` and `localDb` share the same base directory policy (`DATA_DIR` -> `XDG_CONFIG_HOME/omniroute` -> `~/.omniroute`) with legacy file migration.
|
||||
2. `/api/v1/route.ts` delegates to the same unified catalog builder used by `/api/v1/models` (`src/app/api/v1/models/catalog.ts`) to avoid semantic drift.
|
||||
3. Request logger writes full headers/body when enabled; treat log directory as sensitive.
|
||||
4. Cloud behavior depends on correct `NEXT_PUBLIC_BASE_URL` and cloud endpoint reachability.
|
||||
5. The `open-sse/` directory is published as the `@omniroute/open-sse` **npm workspace package**. Source code imports it via `@omniroute/open-sse/...` (resolved by Next.js `transpilePackages`). File paths in this document still use the directory name `open-sse/` for consistency.
|
||||
6. Charts in the dashboard use **Recharts** (SVG-based) for accessible, interactive analytics visualizations (model usage bar charts, provider breakdown tables with success rates).
|
||||
7. E2E tests use **Playwright** (`tests/e2e/`), run via `npm run test:e2e`. Unit tests use **Node.js test runner** (`tests/unit/`), run via `npm run test:unit`. Source code under `src/` is **TypeScript** (`.ts`/`.tsx`); the `open-sse/` workspace remains JavaScript (`.js`).
|
||||
8. Settings page is organized into 5 tabs: Security, Routing (6 global strategies: fill-first, round-robin, p2c, random, least-used, cost-optimized), Resilience (editable rate limits, circuit breaker, policies), AI (thinking budget, system prompt, prompt cache), Advanced (proxy).
|
||||
|
||||
## Operational Verification Checklist
|
||||
|
||||
- Build from source: `npm run build`
|
||||
- Build Docker image: `docker build -t omniroute .`
|
||||
- Start service and verify:
|
||||
- `GET /api/settings`
|
||||
- `GET /api/v1/models`
|
||||
- CLI target base URL should be `http://<host>:20128/v1` when `PORT=20128`
|
||||
@@ -0,0 +1,67 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/AUTO-COMBO.md) · 🇪🇸 [es](../es/AUTO-COMBO.md) · 🇫🇷 [fr](../fr/AUTO-COMBO.md) · 🇩🇪 [de](../de/AUTO-COMBO.md) · 🇮🇹 [it](../it/AUTO-COMBO.md) · 🇷🇺 [ru](../ru/AUTO-COMBO.md) · 🇨🇳 [zh-CN](../zh-CN/AUTO-COMBO.md) · 🇯🇵 [ja](../ja/AUTO-COMBO.md) · 🇰🇷 [ko](../ko/AUTO-COMBO.md) · 🇸🇦 [ar](../ar/AUTO-COMBO.md) · 🇮🇳 [in](../in/AUTO-COMBO.md) · 🇹🇭 [th](../th/AUTO-COMBO.md) · 🇻🇳 [vi](../vi/AUTO-COMBO.md) · 🇮🇩 [id](../id/AUTO-COMBO.md) · 🇲🇾 [ms](../ms/AUTO-COMBO.md) · 🇳🇱 [nl](../nl/AUTO-COMBO.md) · 🇵🇱 [pl](../pl/AUTO-COMBO.md) · 🇸🇪 [sv](../sv/AUTO-COMBO.md) · 🇳🇴 [no](../no/AUTO-COMBO.md) · 🇩🇰 [da](../da/AUTO-COMBO.md) · 🇫🇮 [fi](../fi/AUTO-COMBO.md) · 🇵🇹 [pt](../pt/AUTO-COMBO.md) · 🇷🇴 [ro](../ro/AUTO-COMBO.md) · 🇭🇺 [hu](../hu/AUTO-COMBO.md) · 🇧🇬 [bg](../bg/AUTO-COMBO.md) · 🇸🇰 [sk](../sk/AUTO-COMBO.md) · 🇺🇦 [uk-UA](../uk-UA/AUTO-COMBO.md) · 🇮🇱 [he](../he/AUTO-COMBO.md) · 🇵🇭 [phi](../phi/AUTO-COMBO.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Auto-Combo Engine
|
||||
|
||||
> Self-managing model chains with adaptive scoring
|
||||
|
||||
## How It Works
|
||||
|
||||
The Auto-Combo Engine dynamically selects the best provider/model for each request using a **6-factor scoring function**:
|
||||
|
||||
| Factor | Weight | Description |
|
||||
| :--------- | :----- | :---------------------------------------------- |
|
||||
| Quota | 0.20 | Remaining capacity [0..1] |
|
||||
| Health | 0.25 | Circuit breaker: CLOSED=1.0, HALF=0.5, OPEN=0.0 |
|
||||
| CostInv | 0.20 | Inverse cost (cheaper = higher score) |
|
||||
| LatencyInv | 0.15 | Inverse p95 latency (faster = higher) |
|
||||
| TaskFit | 0.10 | Model × task type fitness score |
|
||||
| Stability | 0.10 | Low variance in latency/errors |
|
||||
|
||||
## Mode Packs
|
||||
|
||||
| Pack | Focus | Key Weight |
|
||||
| :---------------------- | :----------- | :--------------- |
|
||||
| 🚀 **Ship Fast** | Speed | latencyInv: 0.35 |
|
||||
| 💰 **Cost Saver** | Economy | costInv: 0.40 |
|
||||
| 🎯 **Quality First** | Best model | taskFit: 0.40 |
|
||||
| 📡 **Offline Friendly** | Availability | quota: 0.40 |
|
||||
|
||||
## Self-Healing
|
||||
|
||||
- **Temporary exclusion**: Score < 0.2 → excluded for 5 min (progressive backoff, max 30 min)
|
||||
- **Circuit breaker awareness**: OPEN → auto-excluded; HALF_OPEN → probe requests
|
||||
- **Incident mode**: >50% OPEN → disable exploration, maximize stability
|
||||
- **Cooldown recovery**: After exclusion, first request is a "probe" with reduced timeout
|
||||
|
||||
## Bandit Exploration
|
||||
|
||||
5% of requests (configurable) are routed to random providers for exploration. Disabled in incident mode.
|
||||
|
||||
## API
|
||||
|
||||
```bash
|
||||
# Create auto-combo
|
||||
curl -X POST http://localhost:20128/api/combos/auto \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"id":"my-auto","name":"Auto Coder","candidatePool":["anthropic","google","openai"],"modePack":"ship-fast"}'
|
||||
|
||||
# List auto-combos
|
||||
curl http://localhost:20128/api/combos/auto
|
||||
```
|
||||
|
||||
## Task Fitness
|
||||
|
||||
30+ models scored across 6 task types (`coding`, `review`, `planning`, `analysis`, `debugging`, `documentation`). Supports wildcard patterns (e.g., `*-coder` → high coding score).
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------ |
|
||||
| `open-sse/services/autoCombo/scoring.ts` | Scoring function & pool normalization |
|
||||
| `open-sse/services/autoCombo/taskFitness.ts` | Model × task fitness lookup |
|
||||
| `open-sse/services/autoCombo/engine.ts` | Selection logic, bandit, budget cap |
|
||||
| `open-sse/services/autoCombo/selfHealing.ts` | Exclusion, probes, incident mode |
|
||||
| `open-sse/services/autoCombo/modePacks.ts` | 4 weight profiles |
|
||||
| `src/app/api/combos/auto/route.ts` | REST API |
|
||||
@@ -0,0 +1,593 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/CODEBASE_DOCUMENTATION.md) · 🇪🇸 [es](../es/CODEBASE_DOCUMENTATION.md) · 🇫🇷 [fr](../fr/CODEBASE_DOCUMENTATION.md) · 🇩🇪 [de](../de/CODEBASE_DOCUMENTATION.md) · 🇮🇹 [it](../it/CODEBASE_DOCUMENTATION.md) · 🇷🇺 [ru](../ru/CODEBASE_DOCUMENTATION.md) · 🇨🇳 [zh-CN](../zh-CN/CODEBASE_DOCUMENTATION.md) · 🇯🇵 [ja](../ja/CODEBASE_DOCUMENTATION.md) · 🇰🇷 [ko](../ko/CODEBASE_DOCUMENTATION.md) · 🇸🇦 [ar](../ar/CODEBASE_DOCUMENTATION.md) · 🇮🇳 [in](../in/CODEBASE_DOCUMENTATION.md) · 🇹🇭 [th](../th/CODEBASE_DOCUMENTATION.md) · 🇻🇳 [vi](../vi/CODEBASE_DOCUMENTATION.md) · 🇮🇩 [id](../id/CODEBASE_DOCUMENTATION.md) · 🇲🇾 [ms](../ms/CODEBASE_DOCUMENTATION.md) · 🇳🇱 [nl](../nl/CODEBASE_DOCUMENTATION.md) · 🇵🇱 [pl](../pl/CODEBASE_DOCUMENTATION.md) · 🇸🇪 [sv](../sv/CODEBASE_DOCUMENTATION.md) · 🇳🇴 [no](../no/CODEBASE_DOCUMENTATION.md) · 🇩🇰 [da](../da/CODEBASE_DOCUMENTATION.md) · 🇫🇮 [fi](../fi/CODEBASE_DOCUMENTATION.md) · 🇵🇹 [pt](../pt/CODEBASE_DOCUMENTATION.md) · 🇷🇴 [ro](../ro/CODEBASE_DOCUMENTATION.md) · 🇭🇺 [hu](../hu/CODEBASE_DOCUMENTATION.md) · 🇧🇬 [bg](../bg/CODEBASE_DOCUMENTATION.md) · 🇸🇰 [sk](../sk/CODEBASE_DOCUMENTATION.md) · 🇺🇦 [uk-UA](../uk-UA/CODEBASE_DOCUMENTATION.md) · 🇮🇱 [he](../he/CODEBASE_DOCUMENTATION.md) · 🇵🇭 [phi](../phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
---
|
||||
|
||||
# omniroute — Codebase Documentation
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](CODEBASE_DOCUMENTATION.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/CODEBASE_DOCUMENTATION.md) | 🇪🇸 [Español](i18n/es/CODEBASE_DOCUMENTATION.md) | 🇫🇷 [Français](i18n/fr/CODEBASE_DOCUMENTATION.md) | 🇮🇹 [Italiano](i18n/it/CODEBASE_DOCUMENTATION.md) | 🇷🇺 [Русский](i18n/ru/CODEBASE_DOCUMENTATION.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/CODEBASE_DOCUMENTATION.md) | 🇩🇪 [Deutsch](i18n/de/CODEBASE_DOCUMENTATION.md) | 🇮🇳 [हिन्दी](i18n/in/CODEBASE_DOCUMENTATION.md) | 🇹🇭 [ไทย](i18n/th/CODEBASE_DOCUMENTATION.md) | 🇺🇦 [Українська](i18n/uk-UA/CODEBASE_DOCUMENTATION.md) | 🇸🇦 [العربية](i18n/ar/CODEBASE_DOCUMENTATION.md) | 🇯🇵 [日本語](i18n/ja/CODEBASE_DOCUMENTATION.md) | 🇻🇳 [Tiếng Việt](i18n/vi/CODEBASE_DOCUMENTATION.md) | 🇧🇬 [Български](i18n/bg/CODEBASE_DOCUMENTATION.md) | 🇩🇰 [Dansk](i18n/da/CODEBASE_DOCUMENTATION.md) | 🇫🇮 [Suomi](i18n/fi/CODEBASE_DOCUMENTATION.md) | 🇮🇱 [עברית](i18n/he/CODEBASE_DOCUMENTATION.md) | 🇭🇺 [Magyar](i18n/hu/CODEBASE_DOCUMENTATION.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/CODEBASE_DOCUMENTATION.md) | 🇰🇷 [한국어](i18n/ko/CODEBASE_DOCUMENTATION.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/CODEBASE_DOCUMENTATION.md) | 🇳🇱 [Nederlands](i18n/nl/CODEBASE_DOCUMENTATION.md) | 🇳🇴 [Norsk](i18n/no/CODEBASE_DOCUMENTATION.md) | 🇵🇹 [Português (Portugal)](i18n/pt/CODEBASE_DOCUMENTATION.md) | 🇷🇴 [Română](i18n/ro/CODEBASE_DOCUMENTATION.md) | 🇵🇱 [Polski](i18n/pl/CODEBASE_DOCUMENTATION.md) | 🇸🇰 [Slovenčina](i18n/sk/CODEBASE_DOCUMENTATION.md) | 🇸🇪 [Svenska](i18n/sv/CODEBASE_DOCUMENTATION.md) | 🇵🇭 [Filipino](i18n/phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
> A comprehensive, beginner-friendly guide to the **omniroute** multi-provider AI proxy router.
|
||||
|
||||
---
|
||||
|
||||
## 1. What Is omniroute?
|
||||
|
||||
omniroute is a **proxy router** that sits between AI clients (Claude CLI, Codex, Cursor IDE, etc.) and AI providers (Anthropic, Google, OpenAI, AWS, GitHub, etc.). It solves one big problem:
|
||||
|
||||
> **Different AI clients speak different "languages" (API formats), and different AI providers expect different "languages" too.** omniroute translates between them automatically.
|
||||
|
||||
Think of it like a universal translator at the United Nations — any delegate can speak any language, and the translator converts it for any other delegate.
|
||||
|
||||
---
|
||||
|
||||
## 2. Architecture Overview
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph Clients
|
||||
A[Claude CLI]
|
||||
B[Codex]
|
||||
C[Cursor IDE]
|
||||
D[OpenAI-compatible]
|
||||
end
|
||||
|
||||
subgraph omniroute
|
||||
E[Handler Layer]
|
||||
F[Translator Layer]
|
||||
G[Executor Layer]
|
||||
H[Services Layer]
|
||||
end
|
||||
|
||||
subgraph Providers
|
||||
I[Anthropic Claude]
|
||||
J[Google Gemini]
|
||||
K[OpenAI / Codex]
|
||||
L[GitHub Copilot]
|
||||
M[AWS Kiro]
|
||||
N[Antigravity]
|
||||
O[Cursor API]
|
||||
end
|
||||
|
||||
A --> E
|
||||
B --> E
|
||||
C --> E
|
||||
D --> E
|
||||
E --> F
|
||||
F --> G
|
||||
G --> I
|
||||
G --> J
|
||||
G --> K
|
||||
G --> L
|
||||
G --> M
|
||||
G --> N
|
||||
G --> O
|
||||
H -.-> E
|
||||
H -.-> G
|
||||
```
|
||||
|
||||
### Core Principle: Hub-and-Spoke Translation
|
||||
|
||||
All format translation passes through **OpenAI format as the hub**:
|
||||
|
||||
```
|
||||
Client Format → [OpenAI Hub] → Provider Format (request)
|
||||
Provider Format → [OpenAI Hub] → Client Format (response)
|
||||
```
|
||||
|
||||
This means you only need **N translators** (one per format) instead of **N²** (every pair).
|
||||
|
||||
---
|
||||
|
||||
## 3. Project Structure
|
||||
|
||||
```
|
||||
omniroute/
|
||||
├── open-sse/ ← Core proxy library (portable, framework-agnostic)
|
||||
│ ├── index.js ← Main entry point, exports everything
|
||||
│ ├── config/ ← Configuration & constants
|
||||
│ ├── executors/ ← Provider-specific request execution
|
||||
│ ├── handlers/ ← Request handling orchestration
|
||||
│ ├── services/ ← Business logic (auth, models, fallback, usage)
|
||||
│ ├── translator/ ← Format translation engine
|
||||
│ │ ├── request/ ← Request translators (8 files)
|
||||
│ │ ├── response/ ← Response translators (7 files)
|
||||
│ │ └── helpers/ ← Shared translation utilities (6 files)
|
||||
│ └── utils/ ← Utility functions
|
||||
├── src/ ← Application layer (Express/Worker runtime)
|
||||
│ ├── app/ ← Web UI, API routes, middleware
|
||||
│ ├── lib/ ← Database, auth, and shared library code
|
||||
│ ├── mitm/ ← Man-in-the-middle proxy utilities
|
||||
│ ├── models/ ← Database models
|
||||
│ ├── shared/ ← Shared utilities (wrappers around open-sse)
|
||||
│ ├── sse/ ← SSE endpoint handlers
|
||||
│ └── store/ ← State management
|
||||
├── data/ ← Runtime data (credentials, logs)
|
||||
│ └── provider-credentials.json (external credentials override, gitignored)
|
||||
└── tester/ ← Test utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Module-by-Module Breakdown
|
||||
|
||||
### 4.1 Config (`open-sse/config/`)
|
||||
|
||||
The **single source of truth** for all provider configuration.
|
||||
|
||||
| File | Purpose |
|
||||
| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `constants.ts` | `PROVIDERS` object with base URLs, OAuth credentials (defaults), headers, and default system prompts for every provider. Also defines `HTTP_STATUS`, `ERROR_TYPES`, `COOLDOWN_MS`, `BACKOFF_CONFIG`, and `SKIP_PATTERNS`. |
|
||||
| `credentialLoader.ts` | Loads external credentials from `data/provider-credentials.json` and merges them over the hardcoded defaults in `PROVIDERS`. Keeps secrets out of source control while maintaining backwards compatibility. |
|
||||
| `providerModels.ts` | Central model registry: maps provider aliases → model IDs. Functions like `getModels()`, `getProviderByAlias()`. |
|
||||
| `codexInstructions.ts` | System instructions injected into Codex requests (editing constraints, sandbox rules, approval policies). |
|
||||
| `defaultThinkingSignature.ts` | Default "thinking" signatures for Claude and Gemini models. |
|
||||
| `ollamaModels.ts` | Schema definition for local Ollama models (name, size, family, quantization). |
|
||||
|
||||
#### Credential Loading Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["App starts"] --> B["constants.ts defines PROVIDERS\nwith hardcoded defaults"]
|
||||
B --> C{"data/provider-credentials.json\nexists?"}
|
||||
C -->|Yes| D["credentialLoader reads JSON"]
|
||||
C -->|No| E["Use hardcoded defaults"]
|
||||
D --> F{"For each provider in JSON"}
|
||||
F --> G{"Provider exists\nin PROVIDERS?"}
|
||||
G -->|No| H["Log warning, skip"]
|
||||
G -->|Yes| I{"Value is object?"}
|
||||
I -->|No| J["Log warning, skip"]
|
||||
I -->|Yes| K["Merge clientId, clientSecret,\ntokenUrl, authUrl, refreshUrl"]
|
||||
K --> F
|
||||
H --> F
|
||||
J --> F
|
||||
F -->|Done| L["PROVIDERS ready with\nmerged credentials"]
|
||||
E --> L
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 Executors (`open-sse/executors/`)
|
||||
|
||||
Executors encapsulate **provider-specific logic** using the **Strategy Pattern**. Each executor overrides base methods as needed.
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BaseExecutor {
|
||||
+buildUrl(model, stream, options)
|
||||
+buildHeaders(credentials, stream, body)
|
||||
+transformRequest(body, model, stream, credentials)
|
||||
+execute(url, options)
|
||||
+shouldRetry(status, error)
|
||||
+refreshCredentials(credentials, log)
|
||||
}
|
||||
|
||||
class DefaultExecutor {
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class AntigravityExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+shouldRetry()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class CursorExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseResponse()
|
||||
+generateChecksum()
|
||||
}
|
||||
|
||||
class KiroExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseEventStream()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
BaseExecutor <|-- DefaultExecutor
|
||||
BaseExecutor <|-- AntigravityExecutor
|
||||
BaseExecutor <|-- CursorExecutor
|
||||
BaseExecutor <|-- KiroExecutor
|
||||
BaseExecutor <|-- CodexExecutor
|
||||
BaseExecutor <|-- GeminiCLIExecutor
|
||||
BaseExecutor <|-- GithubExecutor
|
||||
```
|
||||
|
||||
| Executor | Provider | Key Specializations |
|
||||
| ---------------- | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- |
|
||||
| `base.ts` | — | Abstract base: URL building, headers, retry logic, credential refresh |
|
||||
| `default.ts` | Claude, Gemini, OpenAI, GLM, Kimi, MiniMax | Generic OAuth token refresh for standard providers |
|
||||
| `antigravity.ts` | Google Cloud Code | Project/session ID generation, multi-URL fallback, custom retry parsing from error messages ("reset after 2h7m23s") |
|
||||
| `cursor.ts` | Cursor IDE | **Most complex**: SHA-256 checksum auth, Protobuf request encoding, binary EventStream → SSE response parsing |
|
||||
| `codex.ts` | OpenAI Codex | Injects system instructions, manages thinking levels, removes unsupported parameters |
|
||||
| `gemini-cli.ts` | Google Gemini CLI | Custom URL building (`streamGenerateContent`), Google OAuth token refresh |
|
||||
| `github.ts` | GitHub Copilot | Dual token system (GitHub OAuth + Copilot token), VSCode header mimicking |
|
||||
| `kiro.ts` | AWS CodeWhisperer | AWS EventStream binary parsing, AMZN event frames, token estimation |
|
||||
| `index.ts` | — | Factory: maps provider name → executor class, with default fallback |
|
||||
|
||||
---
|
||||
|
||||
### 4.3 Handlers (`open-sse/handlers/`)
|
||||
|
||||
The **orchestration layer** — coordinates translation, execution, streaming, and error handling.
|
||||
|
||||
| File | Purpose |
|
||||
| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `chatCore.ts` | **Central orchestrator** (~600 lines). Handles the complete request lifecycle: format detection → translation → executor dispatch → streaming/non-streaming response → token refresh → error handling → usage logging. |
|
||||
| `responsesHandler.ts` | Adapter for OpenAI's Responses API: converts Responses format → Chat Completions → sends to `chatCore` → converts SSE back to Responses format. |
|
||||
| `embeddings.ts` | Embedding generation handler: resolves embedding model → provider, dispatches to provider API, returns OpenAI-compatible embedding response. Supports 6+ providers. |
|
||||
| `imageGeneration.ts` | Image generation handler: resolves image model → provider, supports OpenAI-compatible, Gemini-image (Antigravity), and fallback (Nebius) modes. Returns base64 or URL images. |
|
||||
|
||||
#### Request Lifecycle (chatCore.ts)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant chatCore
|
||||
participant Translator
|
||||
participant Executor
|
||||
participant Provider
|
||||
|
||||
Client->>chatCore: Request (any format)
|
||||
chatCore->>chatCore: Detect source format
|
||||
chatCore->>chatCore: Check bypass patterns
|
||||
chatCore->>chatCore: Resolve model & provider
|
||||
chatCore->>Translator: Translate request (source → OpenAI → target)
|
||||
chatCore->>Executor: Get executor for provider
|
||||
Executor->>Executor: Build URL, headers, transform request
|
||||
Executor->>Executor: Refresh credentials if needed
|
||||
Executor->>Provider: HTTP fetch (streaming or non-streaming)
|
||||
|
||||
alt Streaming
|
||||
Provider-->>chatCore: SSE stream
|
||||
chatCore->>chatCore: Pipe through SSE transform stream
|
||||
Note over chatCore: Transform stream translates<br/>each chunk: target → OpenAI → source
|
||||
chatCore-->>Client: Translated SSE stream
|
||||
else Non-streaming
|
||||
Provider-->>chatCore: JSON response
|
||||
chatCore->>Translator: Translate response
|
||||
chatCore-->>Client: Translated JSON
|
||||
end
|
||||
|
||||
alt Error (401, 429, 500...)
|
||||
chatCore->>Executor: Retry with credential refresh
|
||||
chatCore->>chatCore: Account fallback logic
|
||||
end
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.4 Services (`open-sse/services/`)
|
||||
|
||||
Business logic that supports the handlers and executors.
|
||||
|
||||
| File | Purpose |
|
||||
| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `provider.ts` | **Format detection** (`detectFormat`): analyzes request body structure to identify Claude/OpenAI/Gemini/Antigravity/Responses formats (includes `max_tokens` heuristic for Claude). Also: URL building, header building, thinking config normalization. Supports `openai-compatible-*` and `anthropic-compatible-*` dynamic providers. |
|
||||
| `model.ts` | Model string parsing (`claude/model-name` → `{provider: "claude", model: "model-name"}`), alias resolution with collision detection, input sanitization (rejects path traversal/control chars), and model info resolution with async alias getter support. |
|
||||
| `accountFallback.ts` | Rate-limit handling: exponential backoff (1s → 2s → 4s → max 2min), account cooldown management, error classification (which errors trigger fallback vs. not). |
|
||||
| `tokenRefresh.ts` | OAuth token refresh for **every provider**: Google (Gemini, Antigravity), Claude, Codex, Qwen, iFlow, GitHub (OAuth + Copilot dual-token), Kiro (AWS SSO OIDC + Social Auth). Includes in-flight promise deduplication cache and retry with exponential backoff. |
|
||||
| `combo.ts` | **Combo models**: chains of fallback models. If model A fails with a fallback-eligible error, try model B, then C, etc. Returns actual upstream status codes. |
|
||||
| `usage.ts` | Fetches quota/usage data from provider APIs (GitHub Copilot quotas, Antigravity model quotas, Codex rate limits, Kiro usage breakdowns, Claude settings). |
|
||||
| `accountSelector.ts` | Smart account selection with scoring algorithm: considers priority, health status, round-robin position, and cooldown state to pick the optimal account for each request. |
|
||||
| `contextManager.ts` | Request context lifecycle management: creates and tracks per-request context objects with metadata (request ID, timestamps, provider info) for debugging and logging. |
|
||||
| `ipFilter.ts` | IP-based access control: supports allowlist and blocklist modes. Validates client IP against configured rules before processing API requests. |
|
||||
| `sessionManager.ts` | Session tracking with client fingerprinting: tracks active sessions using hashed client identifiers, monitors request counts, and provides session metrics. |
|
||||
| `signatureCache.ts` | Request signature-based deduplication cache: prevents duplicate requests by caching recent request signatures and returning cached responses for identical requests within a time window. |
|
||||
| `systemPrompt.ts` | Global system prompt injection: prepends or appends a configurable system prompt to all requests, with per-provider compatibility handling. |
|
||||
| `thinkingBudget.ts` | Reasoning token budget management: supports passthrough, auto (strip thinking config), custom (fixed budget), and adaptive (complexity-scaled) modes for controlling thinking/reasoning tokens. |
|
||||
| `wildcardRouter.ts` | Wildcard model pattern routing: resolves wildcard patterns (e.g., `*/claude-*`) to concrete provider/model pairs based on availability and priority. |
|
||||
|
||||
#### Token Refresh Deduplication
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant R1 as Request 1
|
||||
participant R2 as Request 2
|
||||
participant Cache as refreshPromiseCache
|
||||
participant OAuth as OAuth Provider
|
||||
|
||||
R1->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: No in-flight promise
|
||||
Cache->>OAuth: Start refresh
|
||||
R2->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: Found in-flight promise
|
||||
Cache-->>R2: Return existing promise
|
||||
OAuth-->>Cache: New access token
|
||||
Cache-->>R1: New access token
|
||||
Cache-->>R2: Same access token (shared)
|
||||
Cache->>Cache: Delete cache entry
|
||||
```
|
||||
|
||||
#### Account Fallback State Machine
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> Active
|
||||
Active --> Error: Request fails (401/429/500)
|
||||
Error --> Cooldown: Apply backoff
|
||||
Cooldown --> Active: Cooldown expires
|
||||
Active --> Active: Request succeeds (reset backoff)
|
||||
|
||||
state Error {
|
||||
[*] --> ClassifyError
|
||||
ClassifyError --> ShouldFallback: Rate limit / Auth / Transient
|
||||
ClassifyError --> NoFallback: 400 Bad Request
|
||||
}
|
||||
|
||||
state Cooldown {
|
||||
[*] --> ExponentialBackoff
|
||||
ExponentialBackoff: Level 0 = 1s
|
||||
ExponentialBackoff: Level 1 = 2s
|
||||
ExponentialBackoff: Level 2 = 4s
|
||||
ExponentialBackoff: Max = 2min
|
||||
}
|
||||
```
|
||||
|
||||
#### Combo Model Chain
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Request with\ncombo model"] --> B["Model A"]
|
||||
B -->|"2xx Success"| C["Return response"]
|
||||
B -->|"429/401/500"| D{"Fallback\neligible?"}
|
||||
D -->|Yes| E["Model B"]
|
||||
D -->|No| F["Return error"]
|
||||
E -->|"2xx Success"| C
|
||||
E -->|"429/401/500"| G{"Fallback\neligible?"}
|
||||
G -->|Yes| H["Model C"]
|
||||
G -->|No| F
|
||||
H -->|"2xx Success"| C
|
||||
H -->|"Fail"| I["All failed →\nReturn last status"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.5 Translator (`open-sse/translator/`)
|
||||
|
||||
The **format translation engine** using a self-registering plugin system.
|
||||
|
||||
#### Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "Request Translation"
|
||||
A["Claude → OpenAI"]
|
||||
B["Gemini → OpenAI"]
|
||||
C["Antigravity → OpenAI"]
|
||||
D["OpenAI Responses → OpenAI"]
|
||||
E["OpenAI → Claude"]
|
||||
F["OpenAI → Gemini"]
|
||||
G["OpenAI → Kiro"]
|
||||
H["OpenAI → Cursor"]
|
||||
end
|
||||
|
||||
subgraph "Response Translation"
|
||||
I["Claude → OpenAI"]
|
||||
J["Gemini → OpenAI"]
|
||||
K["Kiro → OpenAI"]
|
||||
L["Cursor → OpenAI"]
|
||||
M["OpenAI → Claude"]
|
||||
N["OpenAI → Antigravity"]
|
||||
O["OpenAI → Responses"]
|
||||
end
|
||||
```
|
||||
|
||||
| Directory | Files | Description |
|
||||
| ------------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `request/` | 8 translators | Convert request bodies between formats. Each file self-registers via `register(from, to, fn)` on import. |
|
||||
| `response/` | 7 translators | Convert streaming response chunks between formats. Handles SSE event types, thinking blocks, tool calls. |
|
||||
| `helpers/` | 6 helpers | Shared utilities: `claudeHelper` (system prompt extraction, thinking config), `geminiHelper` (parts/contents mapping), `openaiHelper` (format filtering), `toolCallHelper` (ID generation, missing response injection), `maxTokensHelper`, `responsesApiHelper`. |
|
||||
| `index.ts` | — | Translation engine: `translateRequest()`, `translateResponse()`, state management, registry. |
|
||||
| `formats.ts` | — | Format constants: `OPENAI`, `CLAUDE`, `GEMINI`, `ANTIGRAVITY`, `KIRO`, `CURSOR`, `OPENAI_RESPONSES`. |
|
||||
|
||||
#### Key Design: Self-Registering Plugins
|
||||
|
||||
```javascript
|
||||
// Each translator file calls register() on import:
|
||||
import { register } from "../index.js";
|
||||
register("claude", "openai", translateClaudeToOpenAI);
|
||||
|
||||
// The index.js imports all translator files, triggering registration:
|
||||
import "./request/claude-to-openai.js"; // ← self-registers
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.6 Utils (`open-sse/utils/`)
|
||||
|
||||
| File | Purpose |
|
||||
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `error.ts` | Error response building (OpenAI-compatible format), upstream error parsing, Antigravity retry-time extraction from error messages, SSE error streaming. |
|
||||
| `stream.ts` | **SSE Transform Stream** — the core streaming pipeline. Two modes: `TRANSLATE` (full format translation) and `PASSTHROUGH` (normalize + extract usage). Handles chunk buffering, usage estimation, content length tracking. Per-stream encoder/decoder instances avoid shared state. |
|
||||
| `streamHelpers.ts` | Low-level SSE utilities: `parseSSELine` (whitespace-tolerant), `hasValuableContent` (filters empty chunks for OpenAI/Claude/Gemini), `fixInvalidId`, `formatSSE` (format-aware SSE serialization with `perf_metrics` cleanup). |
|
||||
| `usageTracking.ts` | Token usage extraction from any format (Claude/OpenAI/Gemini/Responses), estimation with separate tool/message char-per-token ratios, buffer addition (2000 tokens safety margin), format-specific field filtering, console logging with ANSI colors. |
|
||||
| `requestLogger.ts` | File-based request logging (opt-in via `ENABLE_REQUEST_LOGS=true`). Creates session folders with numbered files: `1_req_client.json` → `7_res_client.txt`. All I/O is async (fire-and-forget). Masks sensitive headers. |
|
||||
| `bypassHandler.ts` | Intercepts specific patterns from Claude CLI (title extraction, warmup, count) and returns fake responses without calling any provider. Supports both streaming and non-streaming. Intentionally limited to Claude CLI scope. |
|
||||
| `networkProxy.ts` | Resolves outbound proxy URL for a given provider with precedence: provider-specific config → global config → environment variables (`HTTPS_PROXY`/`HTTP_PROXY`/`ALL_PROXY`). Supports `NO_PROXY` exclusions. Caches config for 30s. |
|
||||
|
||||
#### SSE Streaming Pipeline
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["Provider SSE stream"] --> B["TextDecoder\n(per-stream instance)"]
|
||||
B --> C["Buffer lines\n(split on newline)"]
|
||||
C --> D["parseSSELine()\n(trim whitespace, parse JSON)"]
|
||||
D --> E{"Mode?"}
|
||||
E -->|TRANSLATE| F["translateResponse()\ntarget → OpenAI → source"]
|
||||
E -->|PASSTHROUGH| G["fixInvalidId()\nnormalize chunk"]
|
||||
F --> H["hasValuableContent()\nfilter empty chunks"]
|
||||
G --> H
|
||||
H -->|"Has content"| I["extractUsage()\ntrack token counts"]
|
||||
H -->|"Empty"| J["Skip chunk"]
|
||||
I --> K["formatSSE()\nserialize + clean perf_metrics"]
|
||||
K --> L["TextEncoder\n(per-stream instance)"]
|
||||
L --> M["Enqueue to\nclient stream"]
|
||||
|
||||
style A fill:#f9f,stroke:#333
|
||||
style M fill:#9f9,stroke:#333
|
||||
```
|
||||
|
||||
#### Request Logger Session Structure
|
||||
|
||||
```
|
||||
logs/
|
||||
└── claude_gemini_claude-sonnet_20260208_143045/
|
||||
├── 1_req_client.json ← Raw client request
|
||||
├── 2_req_source.json ← After initial conversion
|
||||
├── 3_req_openai.json ← OpenAI intermediate format
|
||||
├── 4_req_target.json ← Final target format
|
||||
├── 5_res_provider.txt ← Provider SSE chunks (streaming)
|
||||
├── 5_res_provider.json ← Provider response (non-streaming)
|
||||
├── 6_res_openai.txt ← OpenAI intermediate chunks
|
||||
├── 7_res_client.txt ← Client-facing SSE chunks
|
||||
└── 6_error.json ← Error details (if any)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.7 Application Layer (`src/`)
|
||||
|
||||
| Directory | Purpose |
|
||||
| ------------- | ---------------------------------------------------------------------- |
|
||||
| `src/app/` | Web UI, API routes, Express middleware, OAuth callback handlers |
|
||||
| `src/lib/` | Database access (`localDb.ts`, `usageDb.ts`), authentication, shared |
|
||||
| `src/mitm/` | Man-in-the-middle proxy utilities for intercepting provider traffic |
|
||||
| `src/models/` | Database model definitions |
|
||||
| `src/shared/` | Wrappers around open-sse functions (provider, stream, error, etc.) |
|
||||
| `src/sse/` | SSE endpoint handlers that wire the open-sse library to Express routes |
|
||||
| `src/store/` | Application state management |
|
||||
|
||||
#### Notable API Routes
|
||||
|
||||
| Route | Methods | Purpose |
|
||||
| --------------------------------------------- | --------------- | ------------------------------------------------------------------------------------- |
|
||||
| `/api/provider-models` | GET/POST/DELETE | CRUD for custom models per provider |
|
||||
| `/api/models/catalog` | GET | Aggregated catalog of all models (chat, embedding, image, custom) grouped by provider |
|
||||
| `/api/settings/proxy` | GET/PUT/DELETE | Hierarchical outbound proxy configuration (`global/providers/combos/keys`) |
|
||||
| `/api/settings/proxy/test` | POST | Validates proxy connectivity and returns public IP/latency |
|
||||
| `/v1/providers/[provider]/chat/completions` | POST | Dedicated per-provider chat completions with model validation |
|
||||
| `/v1/providers/[provider]/embeddings` | POST | Dedicated per-provider embeddings with model validation |
|
||||
| `/v1/providers/[provider]/images/generations` | POST | Dedicated per-provider image generation with model validation |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist management |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget configuration (passthrough/auto/custom/adaptive) |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt injection for all requests |
|
||||
| `/api/sessions` | GET | Active session tracking and metrics |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
|
||||
---
|
||||
|
||||
## 5. Key Design Patterns
|
||||
|
||||
### 5.1 Hub-and-Spoke Translation
|
||||
|
||||
All formats translate through **OpenAI format as the hub**. Adding a new provider only requires writing **one pair** of translators (to/from OpenAI), not N pairs.
|
||||
|
||||
### 5.2 Executor Strategy Pattern
|
||||
|
||||
Each provider has a dedicated executor class inheriting from `BaseExecutor`. The factory in `executors/index.ts` selects the right one at runtime.
|
||||
|
||||
### 5.3 Self-Registering Plugin System
|
||||
|
||||
Translator modules register themselves on import via `register()`. Adding a new translator is just creating a file and importing it.
|
||||
|
||||
### 5.4 Account Fallback with Exponential Backoff
|
||||
|
||||
When a provider returns 429/401/500, the system can switch to the next account, applying exponential cooldowns (1s → 2s → 4s → max 2min).
|
||||
|
||||
### 5.5 Combo Model Chains
|
||||
|
||||
A "combo" groups multiple `provider/model` strings. If the first fails, fallback to the next automatically.
|
||||
|
||||
### 5.6 Stateful Streaming Translation
|
||||
|
||||
Response translation maintains state across SSE chunks (thinking block tracking, tool call accumulation, content block indexing) via the `initState()` mechanism.
|
||||
|
||||
### 5.7 Usage Safety Buffer
|
||||
|
||||
A 2000-token buffer is added to reported usage to prevent clients from hitting context window limits due to overhead from system prompts and format translation.
|
||||
|
||||
---
|
||||
|
||||
## 6. Supported Formats
|
||||
|
||||
| Format | Direction | Identifier |
|
||||
| ----------------------- | --------------- | ------------------ |
|
||||
| OpenAI Chat Completions | source + target | `openai` |
|
||||
| OpenAI Responses API | source + target | `openai-responses` |
|
||||
| Anthropic Claude | source + target | `claude` |
|
||||
| Google Gemini | source + target | `gemini` |
|
||||
| Google Gemini CLI | target only | `gemini-cli` |
|
||||
| Antigravity | source + target | `antigravity` |
|
||||
| AWS Kiro | target only | `kiro` |
|
||||
| Cursor | target only | `cursor` |
|
||||
|
||||
---
|
||||
|
||||
## 7. Supported Providers
|
||||
|
||||
| Provider | Auth Method | Executor | Key Notes |
|
||||
| ------------------------ | ---------------------- | ----------- | --------------------------------------------- |
|
||||
| Anthropic Claude | API key or OAuth | Default | Uses `x-api-key` header |
|
||||
| Google Gemini | API key or OAuth | Default | Uses `x-goog-api-key` header |
|
||||
| Google Gemini CLI | OAuth | GeminiCLI | Uses `streamGenerateContent` endpoint |
|
||||
| Antigravity | OAuth | Antigravity | Multi-URL fallback, custom retry parsing |
|
||||
| OpenAI | API key | Default | Standard Bearer auth |
|
||||
| Codex | OAuth | Codex | Injects system instructions, manages thinking |
|
||||
| GitHub Copilot | OAuth + Copilot token | Github | Dual token, VSCode header mimicking |
|
||||
| Kiro (AWS) | AWS SSO OIDC or Social | Kiro | Binary EventStream parsing |
|
||||
| Cursor IDE | Checksum auth | Cursor | Protobuf encoding, SHA-256 checksums |
|
||||
| Qwen | OAuth | Default | Standard auth |
|
||||
| iFlow | OAuth (Basic + Bearer) | Default | Dual auth header |
|
||||
| OpenRouter | API key | Default | Standard Bearer auth |
|
||||
| GLM, Kimi, MiniMax | API key | Default | Claude-compatible, use `x-api-key` |
|
||||
| `openai-compatible-*` | API key | Default | Dynamic: any OpenAI-compatible endpoint |
|
||||
| `anthropic-compatible-*` | API key | Default | Dynamic: any Claude-compatible endpoint |
|
||||
|
||||
---
|
||||
|
||||
## 8. Data Flow Summary
|
||||
|
||||
### Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor\nbuildUrl + buildHeaders"]
|
||||
D --> E["fetch(providerURL)"]
|
||||
E --> F["createSSEStream()\nTRANSLATE mode"]
|
||||
F --> G["parseSSELine()"]
|
||||
G --> H["translateResponse()\ntarget → OpenAI → source"]
|
||||
H --> I["extractUsage()\n+ addBuffer"]
|
||||
I --> J["formatSSE()"]
|
||||
J --> K["Client receives\ntranslated SSE"]
|
||||
K --> L["logUsage()\nsaveRequestUsage()"]
|
||||
```
|
||||
|
||||
### Non-Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor.execute()"]
|
||||
D --> E["translateResponse()\ntarget → OpenAI → source"]
|
||||
E --> F["Return JSON\nresponse"]
|
||||
```
|
||||
|
||||
### Bypass Flow (Claude CLI)
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Claude CLI request"] --> B{"Match bypass\npattern?"}
|
||||
B -->|"Title/Warmup/Count"| C["Generate fake\nOpenAI response"]
|
||||
B -->|"No match"| D["Normal flow"]
|
||||
C --> E["Translate to\nsource format"]
|
||||
E --> F["Return without\ncalling provider"]
|
||||
```
|
||||
@@ -0,0 +1,148 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/FEATURES.md) · 🇪🇸 [es](../es/FEATURES.md) · 🇫🇷 [fr](../fr/FEATURES.md) · 🇩🇪 [de](../de/FEATURES.md) · 🇮🇹 [it](../it/FEATURES.md) · 🇷🇺 [ru](../ru/FEATURES.md) · 🇨🇳 [zh-CN](../zh-CN/FEATURES.md) · 🇯🇵 [ja](../ja/FEATURES.md) · 🇰🇷 [ko](../ko/FEATURES.md) · 🇸🇦 [ar](../ar/FEATURES.md) · 🇮🇳 [in](../in/FEATURES.md) · 🇹🇭 [th](../th/FEATURES.md) · 🇻🇳 [vi](../vi/FEATURES.md) · 🇮🇩 [id](../id/FEATURES.md) · 🇲🇾 [ms](../ms/FEATURES.md) · 🇳🇱 [nl](../nl/FEATURES.md) · 🇵🇱 [pl](../pl/FEATURES.md) · 🇸🇪 [sv](../sv/FEATURES.md) · 🇳🇴 [no](../no/FEATURES.md) · 🇩🇰 [da](../da/FEATURES.md) · 🇫🇮 [fi](../fi/FEATURES.md) · 🇵🇹 [pt](../pt/FEATURES.md) · 🇷🇴 [ro](../ro/FEATURES.md) · 🇭🇺 [hu](../hu/FEATURES.md) · 🇧🇬 [bg](../bg/FEATURES.md) · 🇸🇰 [sk](../sk/FEATURES.md) · 🇺🇦 [uk-UA](../uk-UA/FEATURES.md) · 🇮🇱 [he](../he/FEATURES.md) · 🇵🇭 [phi](../phi/FEATURES.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Dashboard Features Gallery
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](FEATURES.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/FEATURES.md) | 🇪🇸 [Español](i18n/es/FEATURES.md) | 🇫🇷 [Français](i18n/fr/FEATURES.md) | 🇮🇹 [Italiano](i18n/it/FEATURES.md) | 🇷🇺 [Русский](i18n/ru/FEATURES.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/FEATURES.md) | 🇩🇪 [Deutsch](i18n/de/FEATURES.md) | 🇮🇳 [हिन्दी](i18n/in/FEATURES.md) | 🇹🇭 [ไทย](i18n/th/FEATURES.md) | 🇺🇦 [Українська](i18n/uk-UA/FEATURES.md) | 🇸🇦 [العربية](i18n/ar/FEATURES.md) | 🇯🇵 [日本語](i18n/ja/FEATURES.md) | 🇻🇳 [Tiếng Việt](i18n/vi/FEATURES.md) | 🇧🇬 [Български](i18n/bg/FEATURES.md) | 🇩🇰 [Dansk](i18n/da/FEATURES.md) | 🇫🇮 [Suomi](i18n/fi/FEATURES.md) | 🇮🇱 [עברית](i18n/he/FEATURES.md) | 🇭🇺 [Magyar](i18n/hu/FEATURES.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/FEATURES.md) | 🇰🇷 [한국어](i18n/ko/FEATURES.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/FEATURES.md) | 🇳🇱 [Nederlands](i18n/nl/FEATURES.md) | 🇳🇴 [Norsk](i18n/no/FEATURES.md) | 🇵🇹 [Português (Portugal)](i18n/pt/FEATURES.md) | 🇷🇴 [Română](i18n/ro/FEATURES.md) | 🇵🇱 [Polski](i18n/pl/FEATURES.md) | 🇸🇰 [Slovenčina](i18n/sk/FEATURES.md) | 🇸🇪 [Svenska](i18n/sv/FEATURES.md) | 🇵🇭 [Filipino](i18n/phi/FEATURES.md)
|
||||
|
||||
Visual guide to every section of the OmniRoute dashboard.
|
||||
|
||||
---
|
||||
|
||||
## 🔌 Providers
|
||||
|
||||
Manage AI provider connections: OAuth providers (Claude Code, Codex, Gemini CLI), API key providers (Groq, DeepSeek, OpenRouter), and free providers (iFlow, Qwen, Kiro).
|
||||
|
||||
- **Ollama Cloud** — Cloud-hosted Ollama models at `api.ollama.com` (free "Light usage" tier); use `ollamacloud/<model>` prefix
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
Create model routing combos with 6 strategies: priority, weighted, round-robin, random, least-used, and cost-optimized. Each combo chains multiple models with automatic fallback and includes quick templates and readiness checks.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 📊 Analytics
|
||||
|
||||
Comprehensive usage analytics with token consumption, cost estimates, activity heatmaps, weekly distribution charts, and per-provider breakdowns.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🏥 System Health
|
||||
|
||||
Real-time monitoring: uptime, memory, version, latency percentiles (p50/p95/p99), cache statistics, and provider circuit breaker states.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 Translator Playground
|
||||
|
||||
Four modes for debugging API translations: **Playground** (format converter), **Chat Tester** (live requests), **Test Bench** (batch tests), and **Live Monitor** (real-time stream).
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎮 Model Playground _(v2.0.9+)_
|
||||
|
||||
Test any model directly from the dashboard. Select provider, model, and endpoint, write prompts with Monaco Editor, stream responses in real-time, abort mid-stream, and view timing metrics.
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Themes _(v2.0.5+)_
|
||||
|
||||
Customizable color themes for the entire dashboard. Choose from 7 preset colors (Coral, Blue, Red, Green, Violet, Orange, Cyan) or create a custom theme by picking any hex color. Supports light, dark, and system mode.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Settings
|
||||
|
||||
Comprehensive settings panel with tabs:
|
||||
|
||||
- **General** — System storage, backup management (export/import database)
|
||||
- **Appearance** — Theme selector (dark/light/system), color theme presets and custom colors, health log visibility
|
||||
- **Security** — API endpoint protection, custom provider blocking, IP filtering, session info
|
||||
- **Routing** — Model aliases, background task degradation
|
||||
- **Resilience** — Rate limit persistence, circuit breaker tuning
|
||||
- **Advanced** — Configuration overrides
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Tools
|
||||
|
||||
One-click configuration for AI coding tools: Claude Code, Codex CLI, Gemini CLI, OpenClaw, Kilo Code, Antigravity, Cline, Continue, Cursor, and Factory Droid. Features automated config apply/reset, connection profiles, and model mapping.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🤖 CLI Agents _(v2.0.11+)_
|
||||
|
||||
Dashboard for discovering and managing CLI agents. Shows a grid of 14 built-in agents (Codex, Claude, Goose, Gemini CLI, OpenClaw, Aider, OpenCode, Cline, Qwen Code, ForgeCode, Amazon Q, Open Interpreter, Cursor CLI, Warp) with:
|
||||
|
||||
- **Installation status** — Installed / Not Found with version detection
|
||||
- **Protocol badges** — stdio, HTTP, etc.
|
||||
- **Custom agents** — Register any CLI tool via form (name, binary, version command, spawn args)
|
||||
- **CLI Fingerprint Matching** — Per-provider toggle to match native CLI request signatures, reducing ban risk while preserving proxy IP
|
||||
|
||||
---
|
||||
|
||||
## 🖼️ Media _(v2.0.3+)_
|
||||
|
||||
Generate images, videos, and music from the dashboard. Supports OpenAI, xAI, Together, Hyperbolic, SD WebUI, ComfyUI, AnimateDiff, Stable Audio Open, and MusicGen.
|
||||
|
||||
---
|
||||
|
||||
## 📝 Request Logs
|
||||
|
||||
Real-time request logging with filtering by provider, model, account, and API key. Shows status codes, token usage, latency, and response details.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🌐 API Endpoint
|
||||
|
||||
Your unified API endpoint with capability breakdown: Chat Completions, Responses API, Embeddings, Image Generation, Reranking, Audio Transcription, Text-to-Speech, Moderations, and registered API keys. Cloud proxy support for remote access.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔑 API Key Management
|
||||
|
||||
Create, scope, and revoke API keys. Each key can be restricted to specific models/providers with full access or read-only permissions. Visual key management with usage tracking.
|
||||
|
||||
---
|
||||
|
||||
## 📋 Audit Log
|
||||
|
||||
Administrative action tracking with filtering by action type, actor, target, IP address, and timestamp. Full security event history.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application
|
||||
|
||||
Native Electron desktop app for Windows, macOS, and Linux. Run OmniRoute as a standalone application with system tray integration, offline support, auto-update, and one-click install.
|
||||
|
||||
Key features:
|
||||
|
||||
- Server readiness polling (no blank screen on cold start)
|
||||
- System tray with port management
|
||||
- Content Security Policy
|
||||
- Single-instance lock
|
||||
- Auto-update on restart
|
||||
- Platform-conditional UI (macOS traffic lights, Windows/Linux default titlebar)
|
||||
|
||||
📖 See [`electron/README.md`](../electron/README.md) for full documentation.
|
||||
@@ -0,0 +1,87 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/MCP-SERVER.md) · 🇪🇸 [es](../es/MCP-SERVER.md) · 🇫🇷 [fr](../fr/MCP-SERVER.md) · 🇩🇪 [de](../de/MCP-SERVER.md) · 🇮🇹 [it](../it/MCP-SERVER.md) · 🇷🇺 [ru](../ru/MCP-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/MCP-SERVER.md) · 🇯🇵 [ja](../ja/MCP-SERVER.md) · 🇰🇷 [ko](../ko/MCP-SERVER.md) · 🇸🇦 [ar](../ar/MCP-SERVER.md) · 🇮🇳 [in](../in/MCP-SERVER.md) · 🇹🇭 [th](../th/MCP-SERVER.md) · 🇻🇳 [vi](../vi/MCP-SERVER.md) · 🇮🇩 [id](../id/MCP-SERVER.md) · 🇲🇾 [ms](../ms/MCP-SERVER.md) · 🇳🇱 [nl](../nl/MCP-SERVER.md) · 🇵🇱 [pl](../pl/MCP-SERVER.md) · 🇸🇪 [sv](../sv/MCP-SERVER.md) · 🇳🇴 [no](../no/MCP-SERVER.md) · 🇩🇰 [da](../da/MCP-SERVER.md) · 🇫🇮 [fi](../fi/MCP-SERVER.md) · 🇵🇹 [pt](../pt/MCP-SERVER.md) · 🇷🇴 [ro](../ro/MCP-SERVER.md) · 🇭🇺 [hu](../hu/MCP-SERVER.md) · 🇧🇬 [bg](../bg/MCP-SERVER.md) · 🇸🇰 [sk](../sk/MCP-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/MCP-SERVER.md) · 🇮🇱 [he](../he/MCP-SERVER.md) · 🇵🇭 [phi](../phi/MCP-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute MCP Server Documentation
|
||||
|
||||
> Model Context Protocol server with 16 intelligent tools
|
||||
|
||||
## Installation
|
||||
|
||||
OmniRoute MCP is built-in. Start it with:
|
||||
|
||||
```bash
|
||||
omniroute --mcp
|
||||
```
|
||||
|
||||
Or via the open-sse transport:
|
||||
|
||||
```bash
|
||||
# HTTP streamable transport (port 20130)
|
||||
omniroute --dev # MCP auto-starts on /mcp endpoint
|
||||
```
|
||||
|
||||
## IDE Configuration
|
||||
|
||||
See [IDE Configs](integrations/ide-configs.md) for Antigravity, Cursor, Copilot, and Claude Desktop setup.
|
||||
|
||||
---
|
||||
|
||||
## Essential Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :------------------------------ | :--------------------------------------- |
|
||||
| `omniroute_get_health` | Gateway health, circuit breakers, uptime |
|
||||
| `omniroute_list_combos` | All configured combos with models |
|
||||
| `omniroute_get_combo_metrics` | Performance metrics for a specific combo |
|
||||
| `omniroute_switch_combo` | Switch active combo by ID/name |
|
||||
| `omniroute_check_quota` | Quota status per provider or all |
|
||||
| `omniroute_route_request` | Send a chat completion through OmniRoute |
|
||||
| `omniroute_cost_report` | Cost analytics for a time period |
|
||||
| `omniroute_list_models_catalog` | Full model catalog with capabilities |
|
||||
|
||||
## Advanced Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :--------------------------------- | :---------------------------------------------- |
|
||||
| `omniroute_simulate_route` | Dry-run routing simulation with fallback tree |
|
||||
| `omniroute_set_budget_guard` | Session budget with degrade/block/alert actions |
|
||||
| `omniroute_set_resilience_profile` | Apply conservative/balanced/aggressive preset |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo |
|
||||
| `omniroute_get_provider_metrics` | Detailed metrics for one provider |
|
||||
| `omniroute_best_combo_for_task` | Task-fitness recommendation with alternatives |
|
||||
| `omniroute_explain_route` | Explain a past routing decision |
|
||||
| `omniroute_get_session_snapshot` | Full session state: costs, tokens, errors |
|
||||
|
||||
## Authentication
|
||||
|
||||
MCP tools are authenticated via API key scopes. Each tool requires specific scopes:
|
||||
|
||||
| Scope | Tools |
|
||||
| :------------- | :----------------------------------------------- |
|
||||
| `read:health` | get_health, get_provider_metrics |
|
||||
| `read:combos` | list_combos, get_combo_metrics |
|
||||
| `write:combos` | switch_combo |
|
||||
| `read:quota` | check_quota |
|
||||
| `write:route` | route_request, simulate_route, test_combo |
|
||||
| `read:usage` | cost_report, get_session_snapshot, explain_route |
|
||||
| `write:config` | set_budget_guard, set_resilience_profile |
|
||||
| `read:models` | list_models_catalog, best_combo_for_task |
|
||||
|
||||
## Audit Logging
|
||||
|
||||
Every tool call is logged to `mcp_tool_audit` with:
|
||||
|
||||
- Tool name, arguments, result
|
||||
- Duration (ms), success/failure
|
||||
- API key hash, timestamp
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------------ |
|
||||
| `open-sse/mcp-server/server.ts` | MCP server creation + 16 tool registrations |
|
||||
| `open-sse/mcp-server/transport.ts` | Stdio + HTTP transport |
|
||||
| `open-sse/mcp-server/auth.ts` | API key + scope validation |
|
||||
| `open-sse/mcp-server/audit.ts` | Tool call audit logging |
|
||||
| `open-sse/mcp-server/tools/advancedTools.ts` | 8 advanced tool handlers |
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,37 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/RELEASE_CHECKLIST.md) · 🇪🇸 [es](../es/RELEASE_CHECKLIST.md) · 🇫🇷 [fr](../fr/RELEASE_CHECKLIST.md) · 🇩🇪 [de](../de/RELEASE_CHECKLIST.md) · 🇮🇹 [it](../it/RELEASE_CHECKLIST.md) · 🇷🇺 [ru](../ru/RELEASE_CHECKLIST.md) · 🇨🇳 [zh-CN](../zh-CN/RELEASE_CHECKLIST.md) · 🇯🇵 [ja](../ja/RELEASE_CHECKLIST.md) · 🇰🇷 [ko](../ko/RELEASE_CHECKLIST.md) · 🇸🇦 [ar](../ar/RELEASE_CHECKLIST.md) · 🇮🇳 [in](../in/RELEASE_CHECKLIST.md) · 🇹🇭 [th](../th/RELEASE_CHECKLIST.md) · 🇻🇳 [vi](../vi/RELEASE_CHECKLIST.md) · 🇮🇩 [id](../id/RELEASE_CHECKLIST.md) · 🇲🇾 [ms](../ms/RELEASE_CHECKLIST.md) · 🇳🇱 [nl](../nl/RELEASE_CHECKLIST.md) · 🇵🇱 [pl](../pl/RELEASE_CHECKLIST.md) · 🇸🇪 [sv](../sv/RELEASE_CHECKLIST.md) · 🇳🇴 [no](../no/RELEASE_CHECKLIST.md) · 🇩🇰 [da](../da/RELEASE_CHECKLIST.md) · 🇫🇮 [fi](../fi/RELEASE_CHECKLIST.md) · 🇵🇹 [pt](../pt/RELEASE_CHECKLIST.md) · 🇷🇴 [ro](../ro/RELEASE_CHECKLIST.md) · 🇭🇺 [hu](../hu/RELEASE_CHECKLIST.md) · 🇧🇬 [bg](../bg/RELEASE_CHECKLIST.md) · 🇸🇰 [sk](../sk/RELEASE_CHECKLIST.md) · 🇺🇦 [uk-UA](../uk-UA/RELEASE_CHECKLIST.md) · 🇮🇱 [he](../he/RELEASE_CHECKLIST.md) · 🇵🇭 [phi](../phi/RELEASE_CHECKLIST.md)
|
||||
|
||||
---
|
||||
|
||||
# Release Checklist
|
||||
|
||||
Use this checklist before tagging or publishing a new OmniRoute release.
|
||||
|
||||
## Version and Changelog
|
||||
|
||||
1. Bump `package.json` version (`x.y.z`) in the release branch.
|
||||
2. Move release notes from `## [Unreleased]` in `CHANGELOG.md` to a dated section:
|
||||
- `## [x.y.z] — YYYY-MM-DD`
|
||||
3. Keep `## [Unreleased]` as the first changelog section for upcoming work.
|
||||
4. Ensure the latest semver section in `CHANGELOG.md` equals `package.json` version.
|
||||
|
||||
## API Docs
|
||||
|
||||
1. Update `docs/openapi.yaml`:
|
||||
- `info.version` must equal `package.json` version.
|
||||
2. Validate endpoint examples if API contracts changed.
|
||||
|
||||
## Runtime Docs
|
||||
|
||||
1. Review `docs/ARCHITECTURE.md` for storage/runtime drift.
|
||||
2. Review `docs/TROUBLESHOOTING.md` for env var and operational drift.
|
||||
3. Update localized docs if source docs changed significantly.
|
||||
|
||||
## Automated Check
|
||||
|
||||
Run the sync guard locally before opening PR:
|
||||
|
||||
```bash
|
||||
npm run check:docs-sync
|
||||
```
|
||||
|
||||
CI also runs this check in `.github/workflows/ci.yml` (lint job).
|
||||
@@ -0,0 +1,258 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/TROUBLESHOOTING.md) · 🇪🇸 [es](../es/TROUBLESHOOTING.md) · 🇫🇷 [fr](../fr/TROUBLESHOOTING.md) · 🇩🇪 [de](../de/TROUBLESHOOTING.md) · 🇮🇹 [it](../it/TROUBLESHOOTING.md) · 🇷🇺 [ru](../ru/TROUBLESHOOTING.md) · 🇨🇳 [zh-CN](../zh-CN/TROUBLESHOOTING.md) · 🇯🇵 [ja](../ja/TROUBLESHOOTING.md) · 🇰🇷 [ko](../ko/TROUBLESHOOTING.md) · 🇸🇦 [ar](../ar/TROUBLESHOOTING.md) · 🇮🇳 [in](../in/TROUBLESHOOTING.md) · 🇹🇭 [th](../th/TROUBLESHOOTING.md) · 🇻🇳 [vi](../vi/TROUBLESHOOTING.md) · 🇮🇩 [id](../id/TROUBLESHOOTING.md) · 🇲🇾 [ms](../ms/TROUBLESHOOTING.md) · 🇳🇱 [nl](../nl/TROUBLESHOOTING.md) · 🇵🇱 [pl](../pl/TROUBLESHOOTING.md) · 🇸🇪 [sv](../sv/TROUBLESHOOTING.md) · 🇳🇴 [no](../no/TROUBLESHOOTING.md) · 🇩🇰 [da](../da/TROUBLESHOOTING.md) · 🇫🇮 [fi](../fi/TROUBLESHOOTING.md) · 🇵🇹 [pt](../pt/TROUBLESHOOTING.md) · 🇷🇴 [ro](../ro/TROUBLESHOOTING.md) · 🇭🇺 [hu](../hu/TROUBLESHOOTING.md) · 🇧🇬 [bg](../bg/TROUBLESHOOTING.md) · 🇸🇰 [sk](../sk/TROUBLESHOOTING.md) · 🇺🇦 [uk-UA](../uk-UA/TROUBLESHOOTING.md) · 🇮🇱 [he](../he/TROUBLESHOOTING.md) · 🇵🇭 [phi](../phi/TROUBLESHOOTING.md)
|
||||
|
||||
---
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](TROUBLESHOOTING.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/TROUBLESHOOTING.md) | 🇪🇸 [Español](i18n/es/TROUBLESHOOTING.md) | 🇫🇷 [Français](i18n/fr/TROUBLESHOOTING.md) | 🇮🇹 [Italiano](i18n/it/TROUBLESHOOTING.md) | 🇷🇺 [Русский](i18n/ru/TROUBLESHOOTING.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/TROUBLESHOOTING.md) | 🇩🇪 [Deutsch](i18n/de/TROUBLESHOOTING.md) | 🇮🇳 [हिन्दी](i18n/in/TROUBLESHOOTING.md) | 🇹🇭 [ไทย](i18n/th/TROUBLESHOOTING.md) | 🇺🇦 [Українська](i18n/uk-UA/TROUBLESHOOTING.md) | 🇸🇦 [العربية](i18n/ar/TROUBLESHOOTING.md) | 🇯🇵 [日本語](i18n/ja/TROUBLESHOOTING.md) | 🇻🇳 [Tiếng Việt](i18n/vi/TROUBLESHOOTING.md) | 🇧🇬 [Български](i18n/bg/TROUBLESHOOTING.md) | 🇩🇰 [Dansk](i18n/da/TROUBLESHOOTING.md) | 🇫🇮 [Suomi](i18n/fi/TROUBLESHOOTING.md) | 🇮🇱 [עברית](i18n/he/TROUBLESHOOTING.md) | 🇭🇺 [Magyar](i18n/hu/TROUBLESHOOTING.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/TROUBLESHOOTING.md) | 🇰🇷 [한국어](i18n/ko/TROUBLESHOOTING.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/TROUBLESHOOTING.md) | 🇳🇱 [Nederlands](i18n/nl/TROUBLESHOOTING.md) | 🇳🇴 [Norsk](i18n/no/TROUBLESHOOTING.md) | 🇵🇹 [Português (Portugal)](i18n/pt/TROUBLESHOOTING.md) | 🇷🇴 [Română](i18n/ro/TROUBLESHOOTING.md) | 🇵🇱 [Polski](i18n/pl/TROUBLESHOOTING.md) | 🇸🇰 [Slovenčina](i18n/sk/TROUBLESHOOTING.md) | 🇸🇪 [Svenska](i18n/sv/TROUBLESHOOTING.md) | 🇵🇭 [Filipino](i18n/phi/TROUBLESHOOTING.md)
|
||||
|
||||
Common problems and solutions for OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Quick Fixes
|
||||
|
||||
| Problem | Solution |
|
||||
| ----------------------------- | ------------------------------------------------------------------ |
|
||||
| First login not working | Set `INITIAL_PASSWORD` in `.env` (no hardcoded default) |
|
||||
| Dashboard opens on wrong port | Set `PORT=20128` and `NEXT_PUBLIC_BASE_URL=http://localhost:20128` |
|
||||
| No request logs under `logs/` | Set `ENABLE_REQUEST_LOGS=true` |
|
||||
| EACCES: permission denied | Set `DATA_DIR=/path/to/writable/dir` to override `~/.omniroute` |
|
||||
| Routing strategy not saving | Update to v1.4.11+ (Zod schema fix for settings persistence) |
|
||||
|
||||
---
|
||||
|
||||
## Provider Issues
|
||||
|
||||
### "Language model did not provide messages"
|
||||
|
||||
**Cause:** Provider quota exhausted.
|
||||
|
||||
**Fix:**
|
||||
|
||||
1. Check dashboard quota tracker
|
||||
2. Use a combo with fallback tiers
|
||||
3. Switch to cheaper/free tier
|
||||
|
||||
### Rate Limiting
|
||||
|
||||
**Cause:** Subscription quota exhausted.
|
||||
|
||||
**Fix:**
|
||||
|
||||
- Add fallback: `cc/claude-opus-4-6 → glm/glm-4.7 → if/kimi-k2-thinking`
|
||||
- Use GLM/MiniMax as cheap backup
|
||||
|
||||
### OAuth Token Expired
|
||||
|
||||
OmniRoute auto-refreshes tokens. If issues persist:
|
||||
|
||||
1. Dashboard → Provider → Reconnect
|
||||
2. Delete and re-add the provider connection
|
||||
|
||||
---
|
||||
|
||||
## Cloud Issues
|
||||
|
||||
### Cloud Sync Errors
|
||||
|
||||
1. Verify `BASE_URL` points to your running instance (e.g., `http://localhost:20128`)
|
||||
2. Verify `CLOUD_URL` points to your cloud endpoint (e.g., `https://omniroute.dev`)
|
||||
3. Keep `NEXT_PUBLIC_*` values aligned with server-side values
|
||||
|
||||
### Cloud `stream=false` Returns 500
|
||||
|
||||
**Symptom:** `Unexpected token 'd'...` on cloud endpoint for non-streaming calls.
|
||||
|
||||
**Cause:** Upstream returns SSE payload while client expects JSON.
|
||||
|
||||
**Workaround:** Use `stream=true` for cloud direct calls. Local runtime includes SSE→JSON fallback.
|
||||
|
||||
### Cloud Says Connected but "Invalid API key"
|
||||
|
||||
1. Create a fresh key from local dashboard (`/api/keys`)
|
||||
2. Run cloud sync: Enable Cloud → Sync Now
|
||||
3. Old/non-synced keys can still return `401` on cloud
|
||||
|
||||
---
|
||||
|
||||
## Docker Issues
|
||||
|
||||
### CLI Tool Shows Not Installed
|
||||
|
||||
1. Check runtime fields: `curl http://localhost:20128/api/cli-tools/runtime/codex | jq`
|
||||
2. For portable mode: use image target `runner-cli` (bundled CLIs)
|
||||
3. For host mount mode: set `CLI_EXTRA_PATHS` and mount host bin directory as read-only
|
||||
4. If `installed=true` and `runnable=false`: binary was found but failed healthcheck
|
||||
|
||||
### Quick Runtime Validation
|
||||
|
||||
```bash
|
||||
curl -s http://localhost:20128/api/cli-tools/codex-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
curl -s http://localhost:20128/api/cli-tools/claude-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
curl -s http://localhost:20128/api/cli-tools/openclaw-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cost Issues
|
||||
|
||||
### High Costs
|
||||
|
||||
1. Check usage stats in Dashboard → Usage
|
||||
2. Switch primary model to GLM/MiniMax
|
||||
3. Use free tier (Gemini CLI, iFlow) for non-critical tasks
|
||||
4. Set cost budgets per API key: Dashboard → API Keys → Budget
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
### Enable Request Logs
|
||||
|
||||
Set `ENABLE_REQUEST_LOGS=true` in your `.env` file. Logs appear under `logs/` directory.
|
||||
|
||||
### Check Provider Health
|
||||
|
||||
```bash
|
||||
# Health dashboard
|
||||
http://localhost:20128/dashboard/health
|
||||
|
||||
# API health check
|
||||
curl http://localhost:20128/api/monitoring/health
|
||||
```
|
||||
|
||||
### Runtime Storage
|
||||
|
||||
- Main state: `${DATA_DIR}/storage.sqlite` (providers, combos, aliases, keys, settings)
|
||||
- Usage: SQLite tables in `storage.sqlite` (`usage_history`, `call_logs`, `proxy_logs`) + optional `${DATA_DIR}/log.txt` and `${DATA_DIR}/call_logs/`
|
||||
- Request logs: `<repo>/logs/...` (when `ENABLE_REQUEST_LOGS=true`)
|
||||
|
||||
---
|
||||
|
||||
## Circuit Breaker Issues
|
||||
|
||||
### Provider stuck in OPEN state
|
||||
|
||||
When a provider's circuit breaker is OPEN, requests are blocked until the cooldown expires.
|
||||
|
||||
**Fix:**
|
||||
|
||||
1. Go to **Dashboard → Settings → Resilience**
|
||||
2. Check the circuit breaker card for the affected provider
|
||||
3. Click **Reset All** to clear all breakers, or wait for the cooldown to expire
|
||||
4. Verify the provider is actually available before resetting
|
||||
|
||||
### Provider keeps tripping the circuit breaker
|
||||
|
||||
If a provider repeatedly enters OPEN state:
|
||||
|
||||
1. Check **Dashboard → Health → Provider Health** for the failure pattern
|
||||
2. Go to **Settings → Resilience → Provider Profiles** and increase the failure threshold
|
||||
3. Check if the provider has changed API limits or requires re-authentication
|
||||
4. Review latency telemetry — high latency may cause timeout-based failures
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription Issues
|
||||
|
||||
### "Unsupported model" error
|
||||
|
||||
- Ensure you're using the correct prefix: `deepgram/nova-3` or `assemblyai/best`
|
||||
- Verify the provider is connected in **Dashboard → Providers**
|
||||
|
||||
### Transcription returns empty or fails
|
||||
|
||||
- Check supported audio formats: `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`
|
||||
- Verify file size is within provider limits (typically < 25MB)
|
||||
- Check provider API key validity in the provider card
|
||||
|
||||
---
|
||||
|
||||
## Translator Debugging
|
||||
|
||||
Use **Dashboard → Translator** to debug format translation issues:
|
||||
|
||||
| Mode | When to Use |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------- |
|
||||
| **Playground** | Compare input/output formats side by side — paste a failing request to see how it translates |
|
||||
| **Chat Tester** | Send live messages and inspect the full request/response payload including headers |
|
||||
| **Test Bench** | Run batch tests across format combinations to find which translations are broken |
|
||||
| **Live Monitor** | Watch real-time request flow to catch intermittent translation issues |
|
||||
|
||||
### Common format issues
|
||||
|
||||
- **Thinking tags not appearing** — Check if the target provider supports thinking and the thinking budget setting
|
||||
- **Tool calls dropping** — Some format translations may strip unsupported fields; verify in Playground mode
|
||||
- **System prompt missing** — Claude and Gemini handle system prompts differently; check translation output
|
||||
- **SDK returns raw string instead of object** — Fixed in v1.1.0: response sanitizer now strips non-standard fields (`x_groq`, `usage_breakdown`, etc.) that cause OpenAI SDK Pydantic validation failures
|
||||
- **GLM/ERNIE rejects `system` role** — Fixed in v1.1.0: role normalizer automatically merges system messages into user messages for incompatible models
|
||||
- **`developer` role not recognized** — Fixed in v1.1.0: automatically converted to `system` for non-OpenAI providers
|
||||
- **`json_schema` not working with Gemini** — Fixed in v1.1.0: `response_format` is now converted to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
---
|
||||
|
||||
## Resilience Settings
|
||||
|
||||
### Auto rate-limit not triggering
|
||||
|
||||
- Auto rate-limit only applies to API key providers (not OAuth/subscription)
|
||||
- Verify **Settings → Resilience → Provider Profiles** has auto-rate-limit enabled
|
||||
- Check if the provider returns `429` status codes or `Retry-After` headers
|
||||
|
||||
### Tuning exponential backoff
|
||||
|
||||
Provider profiles support these settings:
|
||||
|
||||
- **Base delay** — Initial wait time after first failure (default: 1s)
|
||||
- **Max delay** — Maximum wait time cap (default: 30s)
|
||||
- **Multiplier** — How much to increase delay per consecutive failure (default: 2x)
|
||||
|
||||
### Anti-thundering herd
|
||||
|
||||
When many concurrent requests hit a rate-limited provider, OmniRoute uses mutex + auto rate-limiting to serialize requests and prevent cascading failures. This is automatic for API key providers.
|
||||
|
||||
---
|
||||
|
||||
## Optional RAG / LLM failure taxonomy (16 problems)
|
||||
|
||||
Some OmniRoute users place the gateway in front of RAG or agent stacks. In those setups it is common to see a strange pattern: OmniRoute looks healthy (providers up, routing profiles ok, no rate limit alerts) but the final answer is still wrong.
|
||||
|
||||
In practice these incidents usually come from the downstream RAG pipeline, not from the gateway itself.
|
||||
|
||||
If you want a shared vocabulary to describe those failures you can use the WFGY ProblemMap, an external MIT license text resource that defines sixteen recurring RAG / LLM failure patterns. At a high level it covers:
|
||||
|
||||
- retrieval drift and broken context boundaries
|
||||
- empty or stale indexes and vector stores
|
||||
- embedding versus semantic mismatch
|
||||
- prompt assembly and context window issues
|
||||
- logic collapse and overconfident answers
|
||||
- long chain and agent coordination failures
|
||||
- multi agent memory and role drift
|
||||
- deployment and bootstrap ordering problems
|
||||
|
||||
The idea is simple:
|
||||
|
||||
1. When you investigate a bad response, capture:
|
||||
- user task and request
|
||||
- route or provider combo in OmniRoute
|
||||
- any RAG context used downstream (retrieved documents, tool calls, etc)
|
||||
2. Map the incident to one or two WFGY ProblemMap numbers (`No.1` … `No.16`).
|
||||
3. Store the number in your own dashboard, runbook, or incident tracker next to the OmniRoute logs.
|
||||
4. Use the corresponding WFGY page to decide whether you need to change your RAG stack, retriever, or routing strategy.
|
||||
|
||||
Full text and concrete recipes live here (MIT license, text only):
|
||||
|
||||
[WFGY ProblemMap README](https://github.com/onestardao/WFGY/blob/main/ProblemMap/README.md)
|
||||
|
||||
You can ignore this section if you do not run RAG or agent pipelines behind OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Still Stuck?
|
||||
|
||||
- **GitHub Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **Architecture**: See [`docs/ARCHITECTURE.md`](ARCHITECTURE.md) for internal details
|
||||
- **API Reference**: See [`docs/API_REFERENCE.md`](API_REFERENCE.md) for all endpoints
|
||||
- **Health Dashboard**: Check **Dashboard → Health** for real-time system status
|
||||
- **Translator**: Use **Dashboard → Translator** to debug format issues
|
||||
@@ -0,0 +1,813 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/USER_GUIDE.md) · 🇪🇸 [es](../es/USER_GUIDE.md) · 🇫🇷 [fr](../fr/USER_GUIDE.md) · 🇩🇪 [de](../de/USER_GUIDE.md) · 🇮🇹 [it](../it/USER_GUIDE.md) · 🇷🇺 [ru](../ru/USER_GUIDE.md) · 🇨🇳 [zh-CN](../zh-CN/USER_GUIDE.md) · 🇯🇵 [ja](../ja/USER_GUIDE.md) · 🇰🇷 [ko](../ko/USER_GUIDE.md) · 🇸🇦 [ar](../ar/USER_GUIDE.md) · 🇮🇳 [in](../in/USER_GUIDE.md) · 🇹🇭 [th](../th/USER_GUIDE.md) · 🇻🇳 [vi](../vi/USER_GUIDE.md) · 🇮🇩 [id](../id/USER_GUIDE.md) · 🇲🇾 [ms](../ms/USER_GUIDE.md) · 🇳🇱 [nl](../nl/USER_GUIDE.md) · 🇵🇱 [pl](../pl/USER_GUIDE.md) · 🇸🇪 [sv](../sv/USER_GUIDE.md) · 🇳🇴 [no](../no/USER_GUIDE.md) · 🇩🇰 [da](../da/USER_GUIDE.md) · 🇫🇮 [fi](../fi/USER_GUIDE.md) · 🇵🇹 [pt](../pt/USER_GUIDE.md) · 🇷🇴 [ro](../ro/USER_GUIDE.md) · 🇭🇺 [hu](../hu/USER_GUIDE.md) · 🇧🇬 [bg](../bg/USER_GUIDE.md) · 🇸🇰 [sk](../sk/USER_GUIDE.md) · 🇺🇦 [uk-UA](../uk-UA/USER_GUIDE.md) · 🇮🇱 [he](../he/USER_GUIDE.md) · 🇵🇭 [phi](../phi/USER_GUIDE.md)
|
||||
|
||||
---
|
||||
|
||||
# User Guide
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](USER_GUIDE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/USER_GUIDE.md) | 🇪🇸 [Español](i18n/es/USER_GUIDE.md) | 🇫🇷 [Français](i18n/fr/USER_GUIDE.md) | 🇮🇹 [Italiano](i18n/it/USER_GUIDE.md) | 🇷🇺 [Русский](i18n/ru/USER_GUIDE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/USER_GUIDE.md) | 🇩🇪 [Deutsch](i18n/de/USER_GUIDE.md) | 🇮🇳 [हिन्दी](i18n/in/USER_GUIDE.md) | 🇹🇭 [ไทย](i18n/th/USER_GUIDE.md) | 🇺🇦 [Українська](i18n/uk-UA/USER_GUIDE.md) | 🇸🇦 [العربية](i18n/ar/USER_GUIDE.md) | 🇯🇵 [日本語](i18n/ja/USER_GUIDE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/USER_GUIDE.md) | 🇧🇬 [Български](i18n/bg/USER_GUIDE.md) | 🇩🇰 [Dansk](i18n/da/USER_GUIDE.md) | 🇫🇮 [Suomi](i18n/fi/USER_GUIDE.md) | 🇮🇱 [עברית](i18n/he/USER_GUIDE.md) | 🇭🇺 [Magyar](i18n/hu/USER_GUIDE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/USER_GUIDE.md) | 🇰🇷 [한국어](i18n/ko/USER_GUIDE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/USER_GUIDE.md) | 🇳🇱 [Nederlands](i18n/nl/USER_GUIDE.md) | 🇳🇴 [Norsk](i18n/no/USER_GUIDE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/USER_GUIDE.md) | 🇷🇴 [Română](i18n/ro/USER_GUIDE.md) | 🇵🇱 [Polski](i18n/pl/USER_GUIDE.md) | 🇸🇰 [Slovenčina](i18n/sk/USER_GUIDE.md) | 🇸🇪 [Svenska](i18n/sv/USER_GUIDE.md) | 🇵🇭 [Filipino](i18n/phi/USER_GUIDE.md)
|
||||
|
||||
Complete guide for configuring providers, creating combos, integrating CLI tools, and deploying OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Pricing at a Glance](#-pricing-at-a-glance)
|
||||
- [Use Cases](#-use-cases)
|
||||
- [Provider Setup](#-provider-setup)
|
||||
- [CLI Integration](#-cli-integration)
|
||||
- [Deployment](#-deployment)
|
||||
- [Available Models](#-available-models)
|
||||
- [Advanced Features](#-advanced-features)
|
||||
|
||||
---
|
||||
|
||||
## 💰 Pricing at a Glance
|
||||
|
||||
| Tier | Provider | Cost | Quota Reset | Best For |
|
||||
| ------------------- | ----------------- | ----------- | ---------------- | -------------------- |
|
||||
| **💳 SUBSCRIPTION** | Claude Code (Pro) | $20/mo | 5h + weekly | Already subscribed |
|
||||
| | Codex (Plus/Pro) | $20-200/mo | 5h + weekly | OpenAI users |
|
||||
| | Gemini CLI | **FREE** | 180K/mo + 1K/day | Everyone! |
|
||||
| | GitHub Copilot | $10-19/mo | Monthly | GitHub users |
|
||||
| **🔑 API KEY** | DeepSeek | Pay per use | None | Cheap reasoning |
|
||||
| | Groq | Pay per use | None | Ultra-fast inference |
|
||||
| | xAI (Grok) | Pay per use | None | Grok 4 reasoning |
|
||||
| | Mistral | Pay per use | None | EU-hosted models |
|
||||
| | Perplexity | Pay per use | None | Search-augmented |
|
||||
| | Together AI | Pay per use | None | Open-source models |
|
||||
| | Fireworks AI | Pay per use | None | Fast FLUX images |
|
||||
| | Cerebras | Pay per use | None | Wafer-scale speed |
|
||||
| | Cohere | Pay per use | None | Command R+ RAG |
|
||||
| | NVIDIA NIM | Pay per use | None | Enterprise models |
|
||||
| **💰 CHEAP** | GLM-4.7 | $0.6/1M | Daily 10AM | Budget backup |
|
||||
| | MiniMax M2.1 | $0.2/1M | 5-hour rolling | Cheapest option |
|
||||
| | Kimi K2 | $9/mo flat | 10M tokens/mo | Predictable cost |
|
||||
| **🆓 FREE** | iFlow | $0 | Unlimited | 8 models free |
|
||||
| | Qwen | $0 | Unlimited | 3 models free |
|
||||
| | Kiro | $0 | Unlimited | Claude free |
|
||||
|
||||
**💡 Pro Tip:** Start with Gemini CLI (180K free/month) + iFlow (unlimited free) combo = $0 cost!
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Use Cases
|
||||
|
||||
### Case 1: "I have Claude Pro subscription"
|
||||
|
||||
**Problem:** Quota expires unused, rate limits during heavy coding
|
||||
|
||||
```
|
||||
Combo: "maximize-claude"
|
||||
1. cc/claude-opus-4-6 (use subscription fully)
|
||||
2. glm/glm-4.7 (cheap backup when quota out)
|
||||
3. if/kimi-k2-thinking (free emergency fallback)
|
||||
|
||||
Monthly cost: $20 (subscription) + ~$5 (backup) = $25 total
|
||||
vs. $20 + hitting limits = frustration
|
||||
```
|
||||
|
||||
### Case 2: "I want zero cost"
|
||||
|
||||
**Problem:** Can't afford subscriptions, need reliable AI coding
|
||||
|
||||
```
|
||||
Combo: "free-forever"
|
||||
1. gc/gemini-3-flash (180K free/month)
|
||||
2. if/kimi-k2-thinking (unlimited free)
|
||||
3. qw/qwen3-coder-plus (unlimited free)
|
||||
|
||||
Monthly cost: $0
|
||||
Quality: Production-ready models
|
||||
```
|
||||
|
||||
### Case 3: "I need 24/7 coding, no interruptions"
|
||||
|
||||
**Problem:** Deadlines, can't afford downtime
|
||||
|
||||
```
|
||||
Combo: "always-on"
|
||||
1. cc/claude-opus-4-6 (best quality)
|
||||
2. cx/gpt-5.2-codex (second subscription)
|
||||
3. glm/glm-4.7 (cheap, resets daily)
|
||||
4. minimax/MiniMax-M2.1 (cheapest, 5h reset)
|
||||
5. if/kimi-k2-thinking (free unlimited)
|
||||
|
||||
Result: 5 layers of fallback = zero downtime
|
||||
Monthly cost: $20-200 (subscriptions) + $10-20 (backup)
|
||||
```
|
||||
|
||||
### Case 4: "I want FREE AI in OpenClaw"
|
||||
|
||||
**Problem:** Need AI assistant in messaging apps, completely free
|
||||
|
||||
```
|
||||
Combo: "openclaw-free"
|
||||
1. if/glm-4.7 (unlimited free)
|
||||
2. if/minimax-m2.1 (unlimited free)
|
||||
3. if/kimi-k2-thinking (unlimited free)
|
||||
|
||||
Monthly cost: $0
|
||||
Access via: WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📖 Provider Setup
|
||||
|
||||
### 🔐 Subscription Providers
|
||||
|
||||
#### Claude Code (Pro/Max)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Claude Code
|
||||
→ OAuth login → Auto token refresh
|
||||
→ 5-hour + weekly quota tracking
|
||||
|
||||
Models:
|
||||
cc/claude-opus-4-6
|
||||
cc/claude-sonnet-4-5-20250929
|
||||
cc/claude-haiku-4-5-20251001
|
||||
```
|
||||
|
||||
**Pro Tip:** Use Opus for complex tasks, Sonnet for speed. OmniRoute tracks quota per model!
|
||||
|
||||
#### OpenAI Codex (Plus/Pro)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Codex
|
||||
→ OAuth login (port 1455)
|
||||
→ 5-hour + weekly reset
|
||||
|
||||
Models:
|
||||
cx/gpt-5.2-codex
|
||||
cx/gpt-5.1-codex-max
|
||||
```
|
||||
|
||||
#### Gemini CLI (FREE 180K/month!)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Gemini CLI
|
||||
→ Google OAuth
|
||||
→ 180K completions/month + 1K/day
|
||||
|
||||
Models:
|
||||
gc/gemini-3-flash-preview
|
||||
gc/gemini-2.5-pro
|
||||
```
|
||||
|
||||
**Best Value:** Huge free tier! Use this before paid tiers.
|
||||
|
||||
#### GitHub Copilot
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect GitHub
|
||||
→ OAuth via GitHub
|
||||
→ Monthly reset (1st of month)
|
||||
|
||||
Models:
|
||||
gh/gpt-5
|
||||
gh/claude-4.5-sonnet
|
||||
gh/gemini-3-pro
|
||||
```
|
||||
|
||||
### 💰 Cheap Providers
|
||||
|
||||
#### GLM-4.7 (Daily reset, $0.6/1M)
|
||||
|
||||
1. Sign up: [Zhipu AI](https://open.bigmodel.cn/)
|
||||
2. Get API key from Coding Plan
|
||||
3. Dashboard → Add API Key: Provider: `glm`, API Key: `your-key`
|
||||
|
||||
**Use:** `glm/glm-4.7` — **Pro Tip:** Coding Plan offers 3× quota at 1/7 cost! Reset daily 10:00 AM.
|
||||
|
||||
#### MiniMax M2.1 (5h reset, $0.20/1M)
|
||||
|
||||
1. Sign up: [MiniMax](https://www.minimax.io/)
|
||||
2. Get API key → Dashboard → Add API Key
|
||||
|
||||
**Use:** `minimax/MiniMax-M2.1` — **Pro Tip:** Cheapest option for long context (1M tokens)!
|
||||
|
||||
#### Kimi K2 ($9/month flat)
|
||||
|
||||
1. Subscribe: [Moonshot AI](https://platform.moonshot.ai/)
|
||||
2. Get API key → Dashboard → Add API Key
|
||||
|
||||
**Use:** `kimi/kimi-latest` — **Pro Tip:** Fixed $9/month for 10M tokens = $0.90/1M effective cost!
|
||||
|
||||
### 🆓 FREE Providers
|
||||
|
||||
#### iFlow (8 FREE models)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect iFlow → OAuth login → Unlimited usage
|
||||
|
||||
Models: if/kimi-k2-thinking, if/qwen3-coder-plus, if/glm-4.7, if/minimax-m2, if/deepseek-r1
|
||||
```
|
||||
|
||||
#### Qwen (3 FREE models)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect Qwen → Device code auth → Unlimited usage
|
||||
|
||||
Models: qw/qwen3-coder-plus, qw/qwen3-coder-flash
|
||||
```
|
||||
|
||||
#### Kiro (Claude FREE)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect Kiro → AWS Builder ID or Google/GitHub → Unlimited
|
||||
|
||||
Models: kr/claude-sonnet-4.5, kr/claude-haiku-4.5
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
### Example 1: Maximize Subscription → Cheap Backup
|
||||
|
||||
```
|
||||
Dashboard → Combos → Create New
|
||||
|
||||
Name: premium-coding
|
||||
Models:
|
||||
1. cc/claude-opus-4-6 (Subscription primary)
|
||||
2. glm/glm-4.7 (Cheap backup, $0.6/1M)
|
||||
3. minimax/MiniMax-M2.1 (Cheapest fallback, $0.20/1M)
|
||||
|
||||
Use in CLI: premium-coding
|
||||
```
|
||||
|
||||
### Example 2: Free-Only (Zero Cost)
|
||||
|
||||
```
|
||||
Name: free-combo
|
||||
Models:
|
||||
1. gc/gemini-3-flash-preview (180K free/month)
|
||||
2. if/kimi-k2-thinking (unlimited)
|
||||
3. qw/qwen3-coder-plus (unlimited)
|
||||
|
||||
Cost: $0 forever!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Integration
|
||||
|
||||
### Cursor IDE
|
||||
|
||||
```
|
||||
Settings → Models → Advanced:
|
||||
OpenAI API Base URL: http://localhost:20128/v1
|
||||
OpenAI API Key: [from omniroute dashboard]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
|
||||
Edit `~/.claude/config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"anthropic_api_base": "http://localhost:20128/v1",
|
||||
"anthropic_api_key": "your-omniroute-api-key"
|
||||
}
|
||||
```
|
||||
|
||||
### Codex CLI
|
||||
|
||||
```bash
|
||||
export OPENAI_BASE_URL="http://localhost:20128"
|
||||
export OPENAI_API_KEY="your-omniroute-api-key"
|
||||
codex "your prompt"
|
||||
```
|
||||
|
||||
### OpenClaw
|
||||
|
||||
Edit `~/.openclaw/openclaw.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": { "primary": "omniroute/if/glm-4.7" }
|
||||
}
|
||||
},
|
||||
"models": {
|
||||
"providers": {
|
||||
"omniroute": {
|
||||
"baseUrl": "http://localhost:20128/v1",
|
||||
"apiKey": "your-omniroute-api-key",
|
||||
"api": "openai-completions",
|
||||
"models": [{ "id": "if/glm-4.7", "name": "glm-4.7" }]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Or use Dashboard:** CLI Tools → OpenClaw → Auto-config
|
||||
|
||||
### Cline / Continue / RooCode
|
||||
|
||||
```
|
||||
Provider: OpenAI Compatible
|
||||
Base URL: http://localhost:20128/v1
|
||||
API Key: [from dashboard]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Deployment
|
||||
|
||||
### Global npm install (Recommended)
|
||||
|
||||
```bash
|
||||
npm install -g omniroute
|
||||
|
||||
# Create config directory
|
||||
mkdir -p ~/.omniroute
|
||||
|
||||
# Create .env file (see .env.example)
|
||||
cp .env.example ~/.omniroute/.env
|
||||
|
||||
# Start server
|
||||
omniroute
|
||||
# Or with custom port:
|
||||
omniroute --port 3000
|
||||
```
|
||||
|
||||
The CLI automatically loads `.env` from `~/.omniroute/.env` or `./.env`.
|
||||
|
||||
### VPS Deployment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/diegosouzapw/OmniRoute.git
|
||||
cd OmniRoute && npm install && npm run build
|
||||
|
||||
export JWT_SECRET="your-secure-secret-change-this"
|
||||
export INITIAL_PASSWORD="your-password"
|
||||
export DATA_DIR="/var/lib/omniroute"
|
||||
export PORT="20128"
|
||||
export HOSTNAME="0.0.0.0"
|
||||
export NODE_ENV="production"
|
||||
export NEXT_PUBLIC_BASE_URL="http://localhost:20128"
|
||||
export API_KEY_SECRET="endpoint-proxy-api-key-secret"
|
||||
|
||||
npm run start
|
||||
# Or: pm2 start npm --name omniroute -- start
|
||||
```
|
||||
|
||||
### PM2 Deployment (Low Memory)
|
||||
|
||||
For servers with limited RAM, use the memory limit option:
|
||||
|
||||
```bash
|
||||
# With 512MB limit (default)
|
||||
pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or with custom memory limit
|
||||
OMNIROUTE_MEMORY_MB=512 pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or using ecosystem.config.js
|
||||
pm2 start ecosystem.config.js
|
||||
```
|
||||
|
||||
Create `ecosystem.config.js`:
|
||||
|
||||
```javascript
|
||||
module.exports = {
|
||||
apps: [
|
||||
{
|
||||
name: "omniroute",
|
||||
script: "npm",
|
||||
args: "start",
|
||||
env: {
|
||||
NODE_ENV: "production",
|
||||
OMNIROUTE_MEMORY_MB: "512",
|
||||
JWT_SECRET: "your-secret",
|
||||
INITIAL_PASSWORD: "your-password",
|
||||
},
|
||||
node_args: "--max-old-space-size=512",
|
||||
max_memory_restart: "300M",
|
||||
},
|
||||
],
|
||||
};
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
# Build image (default = runner-cli with codex/claude/droid preinstalled)
|
||||
docker build -t omniroute:cli .
|
||||
|
||||
# Portable mode (recommended)
|
||||
docker run -d --name omniroute -p 20128:20128 --env-file ./.env -v omniroute-data:/app/data omniroute:cli
|
||||
```
|
||||
|
||||
For host-integrated mode with CLI binaries, see the Docker section in the main docs.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ------------------------- | ------------------------------------ | ------------------------------------------------------- |
|
||||
| `JWT_SECRET` | `omniroute-default-secret-change-me` | JWT signing secret (**change in production**) |
|
||||
| `INITIAL_PASSWORD` | `123456` | First login password |
|
||||
| `DATA_DIR` | `~/.omniroute` | Data directory (db, usage, logs) |
|
||||
| `PORT` | framework default | Service port (`20128` in examples) |
|
||||
| `HOSTNAME` | framework default | Bind host (Docker defaults to `0.0.0.0`) |
|
||||
| `NODE_ENV` | runtime default | Set `production` for deploy |
|
||||
| `BASE_URL` | `http://localhost:20128` | Server-side internal base URL |
|
||||
| `CLOUD_URL` | `https://omniroute.dev` | Cloud sync endpoint base URL |
|
||||
| `API_KEY_SECRET` | `endpoint-proxy-api-key-secret` | HMAC secret for generated API keys |
|
||||
| `REQUIRE_API_KEY` | `false` | Enforce Bearer API key on `/v1/*` |
|
||||
| `ENABLE_REQUEST_LOGS` | `false` | Enables request/response logs |
|
||||
| `AUTH_COOKIE_SECURE` | `false` | Force `Secure` auth cookie (behind HTTPS reverse proxy) |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit in MB |
|
||||
| `PROMPT_CACHE_MAX_SIZE` | `50` | Max prompt cache entries |
|
||||
| `SEMANTIC_CACHE_MAX_SIZE` | `100` | Max semantic cache entries |
|
||||
|
||||
For the full environment variable reference, see the [README](../README.md).
|
||||
|
||||
---
|
||||
|
||||
## 📊 Available Models
|
||||
|
||||
<details>
|
||||
<summary><b>View all available models</b></summary>
|
||||
|
||||
**Claude Code (`cc/`)** — Pro/Max: `cc/claude-opus-4-6`, `cc/claude-sonnet-4-5-20250929`, `cc/claude-haiku-4-5-20251001`
|
||||
|
||||
**Codex (`cx/`)** — Plus/Pro: `cx/gpt-5.2-codex`, `cx/gpt-5.1-codex-max`
|
||||
|
||||
**Gemini CLI (`gc/`)** — FREE: `gc/gemini-3-flash-preview`, `gc/gemini-2.5-pro`
|
||||
|
||||
**GitHub Copilot (`gh/`)**: `gh/gpt-5`, `gh/claude-4.5-sonnet`
|
||||
|
||||
**GLM (`glm/`)** — $0.6/1M: `glm/glm-4.7`
|
||||
|
||||
**MiniMax (`minimax/`)** — $0.2/1M: `minimax/MiniMax-M2.1`
|
||||
|
||||
**iFlow (`if/`)** — FREE: `if/kimi-k2-thinking`, `if/qwen3-coder-plus`, `if/deepseek-r1`
|
||||
|
||||
**Qwen (`qw/`)** — FREE: `qw/qwen3-coder-plus`, `qw/qwen3-coder-flash`
|
||||
|
||||
**Kiro (`kr/`)** — FREE: `kr/claude-sonnet-4.5`, `kr/claude-haiku-4.5`
|
||||
|
||||
**DeepSeek (`ds/`)**: `ds/deepseek-chat`, `ds/deepseek-reasoner`
|
||||
|
||||
**Groq (`groq/`)**: `groq/llama-3.3-70b-versatile`, `groq/llama-4-maverick-17b-128e-instruct`
|
||||
|
||||
**xAI (`xai/`)**: `xai/grok-4`, `xai/grok-4-0709-fast-reasoning`, `xai/grok-code-mini`
|
||||
|
||||
**Mistral (`mistral/`)**: `mistral/mistral-large-2501`, `mistral/codestral-2501`
|
||||
|
||||
**Perplexity (`pplx/`)**: `pplx/sonar-pro`, `pplx/sonar`
|
||||
|
||||
**Together AI (`together/`)**: `together/meta-llama/Llama-3.3-70B-Instruct-Turbo`
|
||||
|
||||
**Fireworks AI (`fireworks/`)**: `fireworks/accounts/fireworks/models/deepseek-v3p1`
|
||||
|
||||
**Cerebras (`cerebras/`)**: `cerebras/llama-3.3-70b`
|
||||
|
||||
**Cohere (`cohere/`)**: `cohere/command-r-plus-08-2024`
|
||||
|
||||
**NVIDIA NIM (`nvidia/`)**: `nvidia/nvidia/llama-3.3-70b-instruct`
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Advanced Features
|
||||
|
||||
### Custom Models
|
||||
|
||||
Add any model ID to any provider without waiting for an app update:
|
||||
|
||||
```bash
|
||||
# Via API
|
||||
curl -X POST http://localhost:20128/api/provider-models \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"provider": "openai", "modelId": "gpt-4.5-preview", "modelName": "GPT-4.5 Preview"}'
|
||||
|
||||
# List: curl http://localhost:20128/api/provider-models?provider=openai
|
||||
# Remove: curl -X DELETE "http://localhost:20128/api/provider-models?provider=openai&model=gpt-4.5-preview"
|
||||
```
|
||||
|
||||
Or use Dashboard: **Providers → [Provider] → Custom Models**.
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
Route requests directly to a specific provider with model validation:
|
||||
|
||||
```bash
|
||||
POST http://localhost:20128/v1/providers/openai/chat/completions
|
||||
POST http://localhost:20128/v1/providers/openai/embeddings
|
||||
POST http://localhost:20128/v1/providers/fireworks/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
### Network Proxy Configuration
|
||||
|
||||
```bash
|
||||
# Set global proxy
|
||||
curl -X PUT http://localhost:20128/api/settings/proxy \
|
||||
-d '{"global": {"type":"http","host":"proxy.example.com","port":"8080"}}'
|
||||
|
||||
# Per-provider proxy
|
||||
curl -X PUT http://localhost:20128/api/settings/proxy \
|
||||
-d '{"providers": {"openai": {"type":"socks5","host":"proxy.example.com","port":"1080"}}}'
|
||||
|
||||
# Test proxy
|
||||
curl -X POST http://localhost:20128/api/settings/proxy/test \
|
||||
-d '{"proxy":{"type":"socks5","host":"proxy.example.com","port":"1080"}}'
|
||||
```
|
||||
|
||||
**Precedence:** Key-specific → Combo-specific → Provider-specific → Global → Environment.
|
||||
|
||||
### Model Catalog API
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/api/models/catalog
|
||||
```
|
||||
|
||||
Returns models grouped by provider with types (`chat`, `embedding`, `image`).
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
- Sync providers, combos, and settings across devices
|
||||
- Automatic background sync with timeout + fail-fast
|
||||
- Prefer server-side `BASE_URL`/`CLOUD_URL` in production
|
||||
|
||||
### LLM Gateway Intelligence (Phase 9)
|
||||
|
||||
- **Semantic Cache** — Auto-caches non-streaming, temperature=0 responses (bypass with `X-OmniRoute-No-Cache: true`)
|
||||
- **Request Idempotency** — Deduplicates requests within 5s via `Idempotency-Key` or `X-Request-Id` header
|
||||
- **Progress Tracking** — Opt-in SSE `event: progress` events via `X-OmniRoute-Progress: true` header
|
||||
|
||||
---
|
||||
|
||||
### Translator Playground
|
||||
|
||||
Access via **Dashboard → Translator**. Debug and visualize how OmniRoute translates API requests between providers.
|
||||
|
||||
| Mode | Purpose |
|
||||
| ---------------- | -------------------------------------------------------------------------------------- |
|
||||
| **Playground** | Select source/target formats, paste a request, and see the translated output instantly |
|
||||
| **Chat Tester** | Send live chat messages through the proxy and inspect the full request/response cycle |
|
||||
| **Test Bench** | Run batch tests across multiple format combinations to verify translation correctness |
|
||||
| **Live Monitor** | Watch real-time translations as requests flow through the proxy |
|
||||
|
||||
**Use cases:**
|
||||
|
||||
- Debug why a specific client/provider combination fails
|
||||
- Verify that thinking tags, tool calls, and system prompts translate correctly
|
||||
- Compare format differences between OpenAI, Claude, Gemini, and Responses API formats
|
||||
|
||||
---
|
||||
|
||||
### Routing Strategies
|
||||
|
||||
Configure via **Dashboard → Settings → Routing**.
|
||||
|
||||
| Strategy | Description |
|
||||
| ------------------------------ | ------------------------------------------------------------------------------------------------ |
|
||||
| **Fill First** | Uses accounts in priority order — primary account handles all requests until unavailable |
|
||||
| **Round Robin** | Cycles through all accounts with a configurable sticky limit (default: 3 calls per account) |
|
||||
| **P2C (Power of Two Choices)** | Picks 2 random accounts and routes to the healthier one — balances load with awareness of health |
|
||||
| **Random** | Randomly selects an account for each request using Fisher-Yates shuffle |
|
||||
| **Least Used** | Routes to the account with the oldest `lastUsedAt` timestamp, distributing traffic evenly |
|
||||
| **Cost Optimized** | Routes to the account with the lowest priority value, optimizing for lowest-cost providers |
|
||||
|
||||
#### Wildcard Model Aliases
|
||||
|
||||
Create wildcard patterns to remap model names:
|
||||
|
||||
```
|
||||
Pattern: claude-sonnet-* → Target: cc/claude-sonnet-4-5-20250929
|
||||
Pattern: gpt-* → Target: gh/gpt-5.1-codex
|
||||
```
|
||||
|
||||
Wildcards support `*` (any characters) and `?` (single character).
|
||||
|
||||
#### Fallback Chains
|
||||
|
||||
Define global fallback chains that apply across all requests:
|
||||
|
||||
```
|
||||
Chain: production-fallback
|
||||
1. cc/claude-opus-4-6
|
||||
2. gh/gpt-5.1-codex
|
||||
3. glm/glm-4.7
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Resilience & Circuit Breakers
|
||||
|
||||
Configure via **Dashboard → Settings → Resilience**.
|
||||
|
||||
OmniRoute implements provider-level resilience with four components:
|
||||
|
||||
1. **Provider Profiles** — Per-provider configuration for:
|
||||
- Failure threshold (how many failures before opening)
|
||||
- Cooldown duration
|
||||
- Rate limit detection sensitivity
|
||||
- Exponential backoff parameters
|
||||
|
||||
2. **Editable Rate Limits** — System-level defaults configurable in the dashboard:
|
||||
- **Requests Per Minute (RPM)** — Maximum requests per minute per account
|
||||
- **Min Time Between Requests** — Minimum gap in milliseconds between requests
|
||||
- **Max Concurrent Requests** — Maximum simultaneous requests per account
|
||||
- Click **Edit** to modify, then **Save** or **Cancel**. Values persist via the resilience API.
|
||||
|
||||
3. **Circuit Breaker** — Tracks failures per provider and automatically opens the circuit when a threshold is reached:
|
||||
- **CLOSED** (Healthy) — Requests flow normally
|
||||
- **OPEN** — Provider is temporarily blocked after repeated failures
|
||||
- **HALF_OPEN** — Testing if provider has recovered
|
||||
|
||||
4. **Policies & Locked Identifiers** — Shows circuit breaker status and locked identifiers with force-unlock capability.
|
||||
|
||||
5. **Rate Limit Auto-Detection** — Monitors `429` and `Retry-After` headers to proactively avoid hitting provider rate limits.
|
||||
|
||||
**Pro Tip:** Use **Reset All** button to clear all circuit breakers and cooldowns when a provider recovers from an outage.
|
||||
|
||||
---
|
||||
|
||||
### Database Export / Import
|
||||
|
||||
Manage database backups in **Dashboard → Settings → System & Storage**.
|
||||
|
||||
| Action | Description |
|
||||
| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| **Export Database** | Downloads the current SQLite database as a `.sqlite` file |
|
||||
| **Export All (.tar.gz)** | Downloads a full backup archive including: database, settings, combos, provider connections (no credentials), API key metadata |
|
||||
| **Import Database** | Upload a `.sqlite` file to replace the current database. A pre-import backup is automatically created |
|
||||
|
||||
```bash
|
||||
# API: Export database
|
||||
curl -o backup.sqlite http://localhost:20128/api/db-backups/export
|
||||
|
||||
# API: Export all (full archive)
|
||||
curl -o backup.tar.gz http://localhost:20128/api/db-backups/exportAll
|
||||
|
||||
# API: Import database
|
||||
curl -X POST http://localhost:20128/api/db-backups/import \
|
||||
-F "file=@backup.sqlite"
|
||||
```
|
||||
|
||||
**Import Validation:** The imported file is validated for integrity (SQLite pragma check), required tables (`provider_connections`, `provider_nodes`, `combos`, `api_keys`), and size (max 100MB).
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Migrate OmniRoute between machines
|
||||
- Create external backups for disaster recovery
|
||||
- Share configurations between team members (export all → share archive)
|
||||
|
||||
---
|
||||
|
||||
### Settings Dashboard
|
||||
|
||||
The settings page is organized into 5 tabs for easy navigation:
|
||||
|
||||
| Tab | Contents |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------- |
|
||||
| **Security** | Login/Password settings, IP Access Control, API auth for `/models`, and Provider Blocking |
|
||||
| **Routing** | Global routing strategy (6 options), wildcard model aliases, fallback chains, combo defaults |
|
||||
| **Resilience** | Provider profiles, editable rate limits, circuit breaker status, policies & locked identifiers |
|
||||
| **AI** | Thinking budget configuration, global system prompt injection, prompt cache stats |
|
||||
| **Advanced** | Global proxy configuration (HTTP/SOCKS5) |
|
||||
|
||||
---
|
||||
|
||||
### Costs & Budget Management
|
||||
|
||||
Access via **Dashboard → Costs**.
|
||||
|
||||
| Tab | Purpose |
|
||||
| ----------- | ---------------------------------------------------------------------------------------- |
|
||||
| **Budget** | Set spending limits per API key with daily/weekly/monthly budgets and real-time tracking |
|
||||
| **Pricing** | View and edit model pricing entries — cost per 1K input/output tokens per provider |
|
||||
|
||||
```bash
|
||||
# API: Set a budget
|
||||
curl -X POST http://localhost:20128/api/usage/budget \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"keyId": "key-123", "limit": 50.00, "period": "monthly"}'
|
||||
|
||||
# API: Get current budget status
|
||||
curl http://localhost:20128/api/usage/budget
|
||||
```
|
||||
|
||||
**Cost Tracking:** Every request logs token usage and calculates cost using the pricing table. View breakdowns in **Dashboard → Usage** by provider, model, and API key.
|
||||
|
||||
---
|
||||
|
||||
### Audio Transcription
|
||||
|
||||
OmniRoute supports audio transcription via the OpenAI-compatible endpoint:
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
|
||||
# Example with curl
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@audio.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
Available providers: **Deepgram** (`deepgram/`), **AssemblyAI** (`assemblyai/`).
|
||||
|
||||
Supported audio formats: `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
### Combo Balancing Strategies
|
||||
|
||||
Configure per-combo balancing in **Dashboard → Combos → Create/Edit → Strategy**.
|
||||
|
||||
| Strategy | Description |
|
||||
| ------------------ | ------------------------------------------------------------------------ |
|
||||
| **Round-Robin** | Rotates through models sequentially |
|
||||
| **Priority** | Always tries the first model; falls back only on error |
|
||||
| **Random** | Picks a random model from the combo for each request |
|
||||
| **Weighted** | Routes proportionally based on assigned weights per model |
|
||||
| **Least-Used** | Routes to the model with the fewest recent requests (uses combo metrics) |
|
||||
| **Cost-Optimized** | Routes to the cheapest available model (uses pricing table) |
|
||||
|
||||
Global combo defaults can be set in **Dashboard → Settings → Routing → Combo Defaults**.
|
||||
|
||||
---
|
||||
|
||||
### Health Dashboard
|
||||
|
||||
Access via **Dashboard → Health**. Real-time system health overview with 6 cards:
|
||||
|
||||
| Card | What It Shows |
|
||||
| --------------------- | ----------------------------------------------------------- |
|
||||
| **System Status** | Uptime, version, memory usage, data directory |
|
||||
| **Provider Health** | Per-provider circuit breaker state (Closed/Open/Half-Open) |
|
||||
| **Rate Limits** | Active rate limit cooldowns per account with remaining time |
|
||||
| **Active Lockouts** | Providers temporarily blocked by the lockout policy |
|
||||
| **Signature Cache** | Deduplication cache stats (active keys, hit rate) |
|
||||
| **Latency Telemetry** | p50/p95/p99 latency aggregation per provider |
|
||||
|
||||
**Pro Tip:** The Health page auto-refreshes every 10 seconds. Use the circuit breaker card to identify which providers are experiencing issues.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application (Electron)
|
||||
|
||||
OmniRoute is available as a native desktop application for Windows, macOS, and Linux.
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# From the electron directory:
|
||||
cd electron
|
||||
npm install
|
||||
|
||||
# Development mode (connect to running Next.js dev server):
|
||||
npm run dev
|
||||
|
||||
# Production mode (uses standalone build):
|
||||
npm start
|
||||
```
|
||||
|
||||
### Building Installers
|
||||
|
||||
```bash
|
||||
cd electron
|
||||
npm run build # Current platform
|
||||
npm run build:win # Windows (.exe NSIS)
|
||||
npm run build:mac # macOS (.dmg universal)
|
||||
npm run build:linux # Linux (.AppImage)
|
||||
```
|
||||
|
||||
Output → `electron/dist-electron/`
|
||||
|
||||
### Key Features
|
||||
|
||||
| Feature | Description |
|
||||
| --------------------------- | ---------------------------------------------------- |
|
||||
| **Server Readiness** | Polls server before showing window (no blank screen) |
|
||||
| **System Tray** | Minimize to tray, change port, quit from tray menu |
|
||||
| **Port Management** | Change server port from tray (auto-restarts server) |
|
||||
| **Content Security Policy** | Restrictive CSP via session headers |
|
||||
| **Single Instance** | Only one app instance can run at a time |
|
||||
| **Offline Mode** | Bundled Next.js server works without internet |
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| --------------------- | ------- | -------------------------------- |
|
||||
| `OMNIROUTE_PORT` | `20128` | Server port |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit (64–16384 MB) |
|
||||
|
||||
📖 Full documentation: [`electron/README.md`](../electron/README.md)
|
||||
@@ -0,0 +1,403 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/VM_DEPLOYMENT_GUIDE.md) · 🇪🇸 [es](../es/VM_DEPLOYMENT_GUIDE.md) · 🇫🇷 [fr](../fr/VM_DEPLOYMENT_GUIDE.md) · 🇩🇪 [de](../de/VM_DEPLOYMENT_GUIDE.md) · 🇮🇹 [it](../it/VM_DEPLOYMENT_GUIDE.md) · 🇷🇺 [ru](../ru/VM_DEPLOYMENT_GUIDE.md) · 🇨🇳 [zh-CN](../zh-CN/VM_DEPLOYMENT_GUIDE.md) · 🇯🇵 [ja](../ja/VM_DEPLOYMENT_GUIDE.md) · 🇰🇷 [ko](../ko/VM_DEPLOYMENT_GUIDE.md) · 🇸🇦 [ar](../ar/VM_DEPLOYMENT_GUIDE.md) · 🇮🇳 [in](../in/VM_DEPLOYMENT_GUIDE.md) · 🇹🇭 [th](../th/VM_DEPLOYMENT_GUIDE.md) · 🇻🇳 [vi](../vi/VM_DEPLOYMENT_GUIDE.md) · 🇮🇩 [id](../id/VM_DEPLOYMENT_GUIDE.md) · 🇲🇾 [ms](../ms/VM_DEPLOYMENT_GUIDE.md) · 🇳🇱 [nl](../nl/VM_DEPLOYMENT_GUIDE.md) · 🇵🇱 [pl](../pl/VM_DEPLOYMENT_GUIDE.md) · 🇸🇪 [sv](../sv/VM_DEPLOYMENT_GUIDE.md) · 🇳🇴 [no](../no/VM_DEPLOYMENT_GUIDE.md) · 🇩🇰 [da](../da/VM_DEPLOYMENT_GUIDE.md) · 🇫🇮 [fi](../fi/VM_DEPLOYMENT_GUIDE.md) · 🇵🇹 [pt](../pt/VM_DEPLOYMENT_GUIDE.md) · 🇷🇴 [ro](../ro/VM_DEPLOYMENT_GUIDE.md) · 🇭🇺 [hu](../hu/VM_DEPLOYMENT_GUIDE.md) · 🇧🇬 [bg](../bg/VM_DEPLOYMENT_GUIDE.md) · 🇸🇰 [sk](../sk/VM_DEPLOYMENT_GUIDE.md) · 🇺🇦 [uk-UA](../uk-UA/VM_DEPLOYMENT_GUIDE.md) · 🇮🇱 [he](../he/VM_DEPLOYMENT_GUIDE.md) · 🇵🇭 [phi](../phi/VM_DEPLOYMENT_GUIDE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Guia de Deploy em VM com Cloudflare
|
||||
|
||||
Guia completo para instalar e configurar o OmniRoute em uma VM (VPS) com domínio gerenciado via Cloudflare.
|
||||
|
||||
---
|
||||
|
||||
## Pré-Requisitos
|
||||
|
||||
| Item | Mínimo | Recomendado |
|
||||
| ----------- | ------------------------ | ---------------- |
|
||||
| **CPU** | 1 vCPU | 2 vCPU |
|
||||
| **RAM** | 1 GB | 2 GB |
|
||||
| **Disco** | 10 GB SSD | 25 GB SSD |
|
||||
| **SO** | Ubuntu 22.04 LTS | Ubuntu 24.04 LTS |
|
||||
| **Domínio** | Registrado no Cloudflare | — |
|
||||
| **Docker** | Docker Engine 24+ | Docker 27+ |
|
||||
|
||||
**Providers testados**: Akamai (Linode), DigitalOcean, Vultr, Hetzner, AWS Lightsail.
|
||||
|
||||
---
|
||||
|
||||
## 1. Configurar a VM
|
||||
|
||||
### 1.1 Criar a instância
|
||||
|
||||
No seu provider de VPS preferido:
|
||||
|
||||
- Escolha Ubuntu 24.04 LTS
|
||||
- Selecione o plano mínimo (1 vCPU / 1 GB RAM)
|
||||
- Defina uma senha forte para root ou configure SSH key
|
||||
- Anote o **IP público** (ex: `203.0.113.10`)
|
||||
|
||||
### 1.2 Conectar via SSH
|
||||
|
||||
```bash
|
||||
ssh root@203.0.113.10
|
||||
```
|
||||
|
||||
### 1.3 Atualizar o sistema
|
||||
|
||||
```bash
|
||||
apt update && apt upgrade -y
|
||||
```
|
||||
|
||||
### 1.4 Instalar Docker
|
||||
|
||||
```bash
|
||||
# Instalar dependências
|
||||
apt install -y ca-certificates curl gnupg
|
||||
|
||||
# Adicionar repositório oficial do Docker
|
||||
install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
apt update
|
||||
apt install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
|
||||
```
|
||||
|
||||
### 1.5 Instalar nginx
|
||||
|
||||
```bash
|
||||
apt install -y nginx
|
||||
```
|
||||
|
||||
### 1.6 Configurar Firewall (UFW)
|
||||
|
||||
```bash
|
||||
ufw default deny incoming
|
||||
ufw default allow outgoing
|
||||
ufw allow 22/tcp # SSH
|
||||
ufw allow 80/tcp # HTTP (redirect)
|
||||
ufw allow 443/tcp # HTTPS
|
||||
ufw enable
|
||||
```
|
||||
|
||||
> **Dica**: Para segurança máxima, restrinja as portas 80 e 443 apenas para IPs da Cloudflare. Veja a seção [Segurança Avançada](#segurança-avançada).
|
||||
|
||||
---
|
||||
|
||||
## 2. Instalar o OmniRoute
|
||||
|
||||
### 2.1 Criar diretório de configuração
|
||||
|
||||
```bash
|
||||
mkdir -p /opt/omniroute
|
||||
```
|
||||
|
||||
### 2.2 Criar arquivo de variáveis de ambiente
|
||||
|
||||
```bash
|
||||
cat > /opt/omniroute/.env << 'EOF'
|
||||
# === Segurança ===
|
||||
JWT_SECRET=ALTERE-PARA-CHAVE-SECRETA-UNICA-64-CHARS
|
||||
INITIAL_PASSWORD=SuaSenhaSegura123!
|
||||
API_KEY_SECRET=ALTERE-PARA-OUTRA-CHAVE-SECRETA
|
||||
STORAGE_ENCRYPTION_KEY=ALTERE-PARA-TERCEIRA-CHAVE-SECRETA
|
||||
STORAGE_ENCRYPTION_KEY_VERSION=v1
|
||||
MACHINE_ID_SALT=ALTERE-PARA-SALT-UNICO
|
||||
|
||||
# === App ===
|
||||
PORT=20128
|
||||
NODE_ENV=production
|
||||
HOSTNAME=0.0.0.0
|
||||
DATA_DIR=/app/data
|
||||
STORAGE_DRIVER=sqlite
|
||||
ENABLE_REQUEST_LOGS=true
|
||||
AUTH_COOKIE_SECURE=false
|
||||
REQUIRE_API_KEY=false
|
||||
|
||||
# === Domain (altere para seu domínio) ===
|
||||
BASE_URL=https://llms.seudominio.com
|
||||
NEXT_PUBLIC_BASE_URL=https://llms.seudominio.com
|
||||
|
||||
# === Cloud Sync (opcional) ===
|
||||
# CLOUD_URL=https://cloud.omniroute.online
|
||||
# NEXT_PUBLIC_CLOUD_URL=https://cloud.omniroute.online
|
||||
EOF
|
||||
```
|
||||
|
||||
> ⚠️ **IMPORTANTE**: Gere chaves secretas únicas! Use `openssl rand -hex 32` para cada chave.
|
||||
|
||||
### 2.3 Iniciar o container
|
||||
|
||||
```bash
|
||||
docker pull diegosouzapw/omniroute:latest
|
||||
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
--env-file /opt/omniroute/.env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
### 2.4 Verificar se está rodando
|
||||
|
||||
```bash
|
||||
docker ps | grep omniroute
|
||||
docker logs omniroute --tail 20
|
||||
```
|
||||
|
||||
Deve exibir: `[DB] SQLite database ready` e `listening on port 20128`.
|
||||
|
||||
---
|
||||
|
||||
## 3. Configurar nginx (Reverse Proxy)
|
||||
|
||||
### 3.1 Gerar certificado SSL (Cloudflare Origin)
|
||||
|
||||
No painel da Cloudflare:
|
||||
|
||||
1. Vá em **SSL/TLS → Origin Server**
|
||||
2. Clique **Create Certificate**
|
||||
3. Deixe os padrões (15 anos, \*.seudominio.com)
|
||||
4. Copie o **Origin Certificate** e a **Private Key**
|
||||
|
||||
```bash
|
||||
mkdir -p /etc/nginx/ssl
|
||||
|
||||
# Colar o certificado
|
||||
nano /etc/nginx/ssl/origin.crt
|
||||
|
||||
# Colar a chave privada
|
||||
nano /etc/nginx/ssl/origin.key
|
||||
|
||||
chmod 600 /etc/nginx/ssl/origin.key
|
||||
```
|
||||
|
||||
### 3.2 Configuração do nginx
|
||||
|
||||
```bash
|
||||
cat > /etc/nginx/sites-available/omniroute << 'NGINX'
|
||||
# Default server — bloqueia acesso direto por IP
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen [::]:80 default_server;
|
||||
listen 443 ssl default_server;
|
||||
listen [::]:443 ssl default_server;
|
||||
ssl_certificate /etc/nginx/ssl/origin.crt;
|
||||
ssl_certificate_key /etc/nginx/ssl/origin.key;
|
||||
server_name _;
|
||||
return 444;
|
||||
}
|
||||
|
||||
# OmniRoute — HTTPS
|
||||
server {
|
||||
listen 443 ssl;
|
||||
listen [::]:443 ssl;
|
||||
server_name llms.seudominio.com; # Altere para seu domínio
|
||||
|
||||
ssl_certificate /etc/nginx/ssl/origin.crt;
|
||||
ssl_certificate_key /etc/nginx/ssl/origin.key;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
|
||||
client_max_body_size 100M;
|
||||
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:20128;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
# SSE (Server-Sent Events) — streaming AI responses
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
}
|
||||
}
|
||||
|
||||
# HTTP → HTTPS redirect
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
server_name llms.seudominio.com;
|
||||
return 301 https://$server_name$request_uri;
|
||||
}
|
||||
NGINX
|
||||
```
|
||||
|
||||
### 3.3 Ativar e testar
|
||||
|
||||
```bash
|
||||
# Remover config padrão
|
||||
rm -f /etc/nginx/sites-enabled/default
|
||||
|
||||
# Ativar OmniRoute
|
||||
ln -sf /etc/nginx/sites-available/omniroute /etc/nginx/sites-enabled/omniroute
|
||||
|
||||
# Testar e recarregar
|
||||
nginx -t && systemctl reload nginx
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Configurar Cloudflare DNS
|
||||
|
||||
### 4.1 Adicionar registro DNS
|
||||
|
||||
No painel da Cloudflare → DNS:
|
||||
|
||||
| Type | Name | Content | Proxy |
|
||||
| ---- | ------ | ------------------------- | ---------- |
|
||||
| A | `llms` | `203.0.113.10` (IP da VM) | ✅ Proxied |
|
||||
|
||||
### 4.2 Configurar SSL
|
||||
|
||||
Em **SSL/TLS → Overview**:
|
||||
|
||||
- Modo: **Full (Strict)**
|
||||
|
||||
Em **SSL/TLS → Edge Certificates**:
|
||||
|
||||
- Always Use HTTPS: ✅ On
|
||||
- Minimum TLS Version: TLS 1.2
|
||||
- Automatic HTTPS Rewrites: ✅ On
|
||||
|
||||
### 4.3 Testar
|
||||
|
||||
```bash
|
||||
curl -sI https://llms.seudominio.com/health
|
||||
# Deve retornar HTTP/2 200
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Operações e Manutenção
|
||||
|
||||
### Atualizar para nova versão
|
||||
|
||||
```bash
|
||||
docker pull diegosouzapw/omniroute:latest
|
||||
docker stop omniroute && docker rm omniroute
|
||||
docker run -d --name omniroute --restart unless-stopped \
|
||||
--env-file /opt/omniroute/.env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
### Ver logs
|
||||
|
||||
```bash
|
||||
docker logs -f omniroute # Stream em tempo real
|
||||
docker logs omniroute --tail 50 # Últimas 50 linhas
|
||||
```
|
||||
|
||||
### Backup manual do banco
|
||||
|
||||
```bash
|
||||
# Copiar dados do volume para o host
|
||||
docker cp omniroute:/app/data ./backup-$(date +%F)
|
||||
|
||||
# Ou comprimir todo o volume
|
||||
docker run --rm -v omniroute-data:/data -v $(pwd):/backup \
|
||||
alpine tar czf /backup/omniroute-data-$(date +%F).tar.gz /data
|
||||
```
|
||||
|
||||
### Restaurar de backup
|
||||
|
||||
```bash
|
||||
docker stop omniroute
|
||||
docker run --rm -v omniroute-data:/data -v $(pwd):/backup \
|
||||
alpine sh -c "rm -rf /data/* && tar xzf /backup/omniroute-data-YYYY-MM-DD.tar.gz -C /"
|
||||
docker start omniroute
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Segurança Avançada
|
||||
|
||||
### Restringir nginx para Cloudflare IPs
|
||||
|
||||
```bash
|
||||
cat > /etc/nginx/cloudflare-ips.conf << 'CF'
|
||||
# Cloudflare IPv4 ranges — atualizar periodicamente
|
||||
# https://www.cloudflare.com/ips-v4/
|
||||
set_real_ip_from 173.245.48.0/20;
|
||||
set_real_ip_from 103.21.244.0/22;
|
||||
set_real_ip_from 103.22.200.0/22;
|
||||
set_real_ip_from 103.31.4.0/22;
|
||||
set_real_ip_from 141.101.64.0/18;
|
||||
set_real_ip_from 108.162.192.0/18;
|
||||
set_real_ip_from 190.93.240.0/20;
|
||||
set_real_ip_from 188.114.96.0/20;
|
||||
set_real_ip_from 197.234.240.0/22;
|
||||
set_real_ip_from 198.41.128.0/17;
|
||||
set_real_ip_from 162.158.0.0/15;
|
||||
set_real_ip_from 104.16.0.0/13;
|
||||
set_real_ip_from 104.24.0.0/14;
|
||||
set_real_ip_from 172.64.0.0/13;
|
||||
set_real_ip_from 131.0.72.0/22;
|
||||
real_ip_header CF-Connecting-IP;
|
||||
CF
|
||||
```
|
||||
|
||||
Adicionar no `nginx.conf` dentro do bloco `http {}`:
|
||||
|
||||
```nginx
|
||||
include /etc/nginx/cloudflare-ips.conf;
|
||||
```
|
||||
|
||||
### Install fail2ban
|
||||
|
||||
```bash
|
||||
apt install -y fail2ban
|
||||
systemctl enable fail2ban
|
||||
systemctl start fail2ban
|
||||
|
||||
# Verificar status
|
||||
fail2ban-client status sshd
|
||||
```
|
||||
|
||||
### Bloquear acesso direto na porta do Docker
|
||||
|
||||
```bash
|
||||
# Impedir acesso externo direto à porta 20128
|
||||
iptables -I DOCKER-USER -p tcp --dport 20128 -j DROP
|
||||
iptables -I DOCKER-USER -i lo -p tcp --dport 20128 -j ACCEPT
|
||||
|
||||
# Persistir as regras
|
||||
apt install -y iptables-persistent
|
||||
netfilter-persistent save
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Deploy do Cloud Worker (Opcional)
|
||||
|
||||
Para acesso remoto via Cloudflare Workers (sem expor a VM diretamente):
|
||||
|
||||
```bash
|
||||
# No repositório local
|
||||
cd omnirouteCloud
|
||||
npm install
|
||||
npx wrangler login
|
||||
npx wrangler deploy
|
||||
```
|
||||
|
||||
Ver documentação completa em [omnirouteCloud/README.md](../omnirouteCloud/README.md).
|
||||
|
||||
---
|
||||
|
||||
## Resumo de Portas
|
||||
|
||||
| Porta | Serviço | Acesso |
|
||||
| ----- | ----------- | ----------------------------- |
|
||||
| 22 | SSH | Público (com fail2ban) |
|
||||
| 80 | nginx HTTP | Redirect → HTTPS |
|
||||
| 443 | nginx HTTPS | Via Cloudflare Proxy |
|
||||
| 20128 | OmniRoute | Somente localhost (via nginx) |
|
||||
@@ -0,0 +1,200 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/A2A-SERVER.md) · 🇪🇸 [es](../es/A2A-SERVER.md) · 🇫🇷 [fr](../fr/A2A-SERVER.md) · 🇩🇪 [de](../de/A2A-SERVER.md) · 🇮🇹 [it](../it/A2A-SERVER.md) · 🇷🇺 [ru](../ru/A2A-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/A2A-SERVER.md) · 🇯🇵 [ja](../ja/A2A-SERVER.md) · 🇰🇷 [ko](../ko/A2A-SERVER.md) · 🇸🇦 [ar](../ar/A2A-SERVER.md) · 🇮🇳 [in](../in/A2A-SERVER.md) · 🇹🇭 [th](../th/A2A-SERVER.md) · 🇻🇳 [vi](../vi/A2A-SERVER.md) · 🇮🇩 [id](../id/A2A-SERVER.md) · 🇲🇾 [ms](../ms/A2A-SERVER.md) · 🇳🇱 [nl](../nl/A2A-SERVER.md) · 🇵🇱 [pl](../pl/A2A-SERVER.md) · 🇸🇪 [sv](../sv/A2A-SERVER.md) · 🇳🇴 [no](../no/A2A-SERVER.md) · 🇩🇰 [da](../da/A2A-SERVER.md) · 🇫🇮 [fi](../fi/A2A-SERVER.md) · 🇵🇹 [pt](../pt/A2A-SERVER.md) · 🇷🇴 [ro](../ro/A2A-SERVER.md) · 🇭🇺 [hu](../hu/A2A-SERVER.md) · 🇧🇬 [bg](../bg/A2A-SERVER.md) · 🇸🇰 [sk](../sk/A2A-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/A2A-SERVER.md) · 🇮🇱 [he](../he/A2A-SERVER.md) · 🇵🇭 [phi](../phi/A2A-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute A2A Server Documentation
|
||||
|
||||
> Agent-to-Agent Protocol v0.3 — OmniRoute as an intelligent routing agent
|
||||
|
||||
## Agent Discovery
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/.well-known/agent.json
|
||||
```
|
||||
|
||||
Returns the Agent Card describing OmniRoute's capabilities, skills, and authentication requirements.
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
All `/a2a` requests require an API key via the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer YOUR_OMNIROUTE_API_KEY
|
||||
```
|
||||
|
||||
If no API key is configured on the server, authentication is bypassed.
|
||||
|
||||
---
|
||||
|
||||
## JSON-RPC 2.0 Methods
|
||||
|
||||
### `message/send` — Synchronous Execution
|
||||
|
||||
Sends a message to a skill and waits for the complete response.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Write a hello world in Python"}],
|
||||
"metadata": {"model": "auto", "combo": "fast-coding"}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"result": {
|
||||
"task": { "id": "uuid", "state": "completed" },
|
||||
"artifacts": [{ "type": "text", "content": "..." }],
|
||||
"metadata": {
|
||||
"routing_explanation": "Selected claude-sonnet via provider \"anthropic\" (latency: 1200ms, cost: $0.003)",
|
||||
"cost_envelope": { "estimated": 0.005, "actual": 0.003, "currency": "USD" },
|
||||
"resilience_trace": [
|
||||
{ "event": "primary_selected", "provider": "anthropic", "timestamp": "..." }
|
||||
],
|
||||
"policy_verdict": { "allowed": true, "reason": "within budget and quota limits" }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `message/stream` — SSE Streaming
|
||||
|
||||
Same as `message/send` but returns Server-Sent Events for real-time streaming.
|
||||
|
||||
```bash
|
||||
curl -N -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/stream",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Explain quantum computing"}]
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**SSE Events:**
|
||||
|
||||
```
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"working"},"chunk":{"type":"text","content":"..."}}}
|
||||
|
||||
: heartbeat 2026-03-03T17:00:00Z
|
||||
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"completed"},"metadata":{...}}}
|
||||
```
|
||||
|
||||
### `tasks/get` — Query Task Status
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"2","method":"tasks/get","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
### `tasks/cancel` — Cancel a Task
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"3","method":"tasks/cancel","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Skills
|
||||
|
||||
| Skill | Description |
|
||||
| :----------------- | :------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `smart-routing` | Routes prompts through OmniRoute's intelligent pipeline. Returns response with routing explanation, cost, and resilience trace. |
|
||||
| `quota-management` | Answers natural-language queries about provider quotas, suggests free combos, and provides quota rankings. |
|
||||
|
||||
---
|
||||
|
||||
## Task Lifecycle
|
||||
|
||||
```
|
||||
submitted → working → completed
|
||||
→ failed
|
||||
→ cancelled
|
||||
```
|
||||
|
||||
- Tasks expire after 5 minutes (configurable)
|
||||
- Terminal states: `completed`, `failed`, `cancelled`
|
||||
- Event log tracks every state transition
|
||||
|
||||
---
|
||||
|
||||
## Error Codes
|
||||
|
||||
| Code | Meaning |
|
||||
| :----- | :----------------------------- |
|
||||
| -32700 | Parse error (invalid JSON) |
|
||||
| -32600 | Invalid request / Unauthorized |
|
||||
| -32601 | Method or skill not found |
|
||||
| -32602 | Invalid params |
|
||||
| -32603 | Internal error |
|
||||
|
||||
---
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### Python (requests)
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
resp = requests.post("http://localhost:20128/a2a", json={
|
||||
"jsonrpc": "2.0", "id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}
|
||||
}, headers={"Authorization": "Bearer YOUR_KEY"})
|
||||
|
||||
result = resp.json()["result"]
|
||||
print(result["artifacts"][0]["content"])
|
||||
print(result["metadata"]["routing_explanation"])
|
||||
```
|
||||
|
||||
### TypeScript (fetch)
|
||||
|
||||
```typescript
|
||||
const resp = await fetch("http://localhost:20128/a2a", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: "Bearer YOUR_KEY",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: "1",
|
||||
method: "message/send",
|
||||
params: {
|
||||
skill: "smart-routing",
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
const { result } = await resp.json();
|
||||
console.log(result.metadata.routing_explanation);
|
||||
```
|
||||
@@ -0,0 +1,455 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/API_REFERENCE.md) · 🇪🇸 [es](../es/API_REFERENCE.md) · 🇫🇷 [fr](../fr/API_REFERENCE.md) · 🇩🇪 [de](../de/API_REFERENCE.md) · 🇮🇹 [it](../it/API_REFERENCE.md) · 🇷🇺 [ru](../ru/API_REFERENCE.md) · 🇨🇳 [zh-CN](../zh-CN/API_REFERENCE.md) · 🇯🇵 [ja](../ja/API_REFERENCE.md) · 🇰🇷 [ko](../ko/API_REFERENCE.md) · 🇸🇦 [ar](../ar/API_REFERENCE.md) · 🇮🇳 [in](../in/API_REFERENCE.md) · 🇹🇭 [th](../th/API_REFERENCE.md) · 🇻🇳 [vi](../vi/API_REFERENCE.md) · 🇮🇩 [id](../id/API_REFERENCE.md) · 🇲🇾 [ms](../ms/API_REFERENCE.md) · 🇳🇱 [nl](../nl/API_REFERENCE.md) · 🇵🇱 [pl](../pl/API_REFERENCE.md) · 🇸🇪 [sv](../sv/API_REFERENCE.md) · 🇳🇴 [no](../no/API_REFERENCE.md) · 🇩🇰 [da](../da/API_REFERENCE.md) · 🇫🇮 [fi](../fi/API_REFERENCE.md) · 🇵🇹 [pt](../pt/API_REFERENCE.md) · 🇷🇴 [ro](../ro/API_REFERENCE.md) · 🇭🇺 [hu](../hu/API_REFERENCE.md) · 🇧🇬 [bg](../bg/API_REFERENCE.md) · 🇸🇰 [sk](../sk/API_REFERENCE.md) · 🇺🇦 [uk-UA](../uk-UA/API_REFERENCE.md) · 🇮🇱 [he](../he/API_REFERENCE.md) · 🇵🇭 [phi](../phi/API_REFERENCE.md)
|
||||
|
||||
---
|
||||
|
||||
# API Reference
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](API_REFERENCE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/API_REFERENCE.md) | 🇪🇸 [Español](i18n/es/API_REFERENCE.md) | 🇫🇷 [Français](i18n/fr/API_REFERENCE.md) | 🇮🇹 [Italiano](i18n/it/API_REFERENCE.md) | 🇷🇺 [Русский](i18n/ru/API_REFERENCE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/API_REFERENCE.md) | 🇩🇪 [Deutsch](i18n/de/API_REFERENCE.md) | 🇮🇳 [हिन्दी](i18n/in/API_REFERENCE.md) | 🇹🇭 [ไทย](i18n/th/API_REFERENCE.md) | 🇺🇦 [Українська](i18n/uk-UA/API_REFERENCE.md) | 🇸🇦 [العربية](i18n/ar/API_REFERENCE.md) | 🇯🇵 [日本語](i18n/ja/API_REFERENCE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/API_REFERENCE.md) | 🇧🇬 [Български](i18n/bg/API_REFERENCE.md) | 🇩🇰 [Dansk](i18n/da/API_REFERENCE.md) | 🇫🇮 [Suomi](i18n/fi/API_REFERENCE.md) | 🇮🇱 [עברית](i18n/he/API_REFERENCE.md) | 🇭🇺 [Magyar](i18n/hu/API_REFERENCE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/API_REFERENCE.md) | 🇰🇷 [한국어](i18n/ko/API_REFERENCE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/API_REFERENCE.md) | 🇳🇱 [Nederlands](i18n/nl/API_REFERENCE.md) | 🇳🇴 [Norsk](i18n/no/API_REFERENCE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/API_REFERENCE.md) | 🇷🇴 [Română](i18n/ro/API_REFERENCE.md) | 🇵🇱 [Polski](i18n/pl/API_REFERENCE.md) | 🇸🇰 [Slovenčina](i18n/sk/API_REFERENCE.md) | 🇸🇪 [Svenska](i18n/sv/API_REFERENCE.md) | 🇵🇭 [Filipino](i18n/phi/API_REFERENCE.md)
|
||||
|
||||
Complete reference for all OmniRoute API endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Chat Completions](#chat-completions)
|
||||
- [Embeddings](#embeddings)
|
||||
- [Image Generation](#image-generation)
|
||||
- [List Models](#list-models)
|
||||
- [Compatibility Endpoints](#compatibility-endpoints)
|
||||
- [Semantic Cache](#semantic-cache)
|
||||
- [Dashboard & Management](#dashboard--management)
|
||||
- [Request Processing](#request-processing)
|
||||
- [Authentication](#authentication)
|
||||
|
||||
---
|
||||
|
||||
## Chat Completions
|
||||
|
||||
```bash
|
||||
POST /v1/chat/completions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "cc/claude-opus-4-6",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Write a function to..."}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
```
|
||||
|
||||
### Custom Headers
|
||||
|
||||
| Header | Direction | Description |
|
||||
| ------------------------ | --------- | --------------------------------- |
|
||||
| `X-OmniRoute-No-Cache` | Request | Set to `true` to bypass cache |
|
||||
| `X-OmniRoute-Progress` | Request | Set to `true` for progress events |
|
||||
| `Idempotency-Key` | Request | Dedup key (5s window) |
|
||||
| `X-Request-Id` | Request | Alternative dedup key |
|
||||
| `X-OmniRoute-Cache` | Response | `HIT` or `MISS` (non-streaming) |
|
||||
| `X-OmniRoute-Idempotent` | Response | `true` if deduplicated |
|
||||
| `X-OmniRoute-Progress` | Response | `enabled` if progress tracking on |
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
```bash
|
||||
POST /v1/embeddings
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "nebius/Qwen/Qwen3-Embedding-8B",
|
||||
"input": "The food was delicious"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: Nebius, OpenAI, Mistral, Together AI, Fireworks, NVIDIA.
|
||||
|
||||
```bash
|
||||
# List all embedding models
|
||||
GET /v1/embeddings
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Image Generation
|
||||
|
||||
```bash
|
||||
POST /v1/images/generations
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "openai/dall-e-3",
|
||||
"prompt": "A beautiful sunset over mountains",
|
||||
"size": "1024x1024"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: OpenAI (DALL-E), xAI (Grok Image), Together AI (FLUX), Fireworks AI.
|
||||
|
||||
```bash
|
||||
# List all image models
|
||||
GET /v1/images/generations
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## List Models
|
||||
|
||||
```bash
|
||||
GET /v1/models
|
||||
Authorization: Bearer your-api-key
|
||||
|
||||
→ Returns all chat, embedding, and image models + combos in OpenAI format
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Compatibility Endpoints
|
||||
|
||||
| Method | Path | Format |
|
||||
| ------ | --------------------------- | ---------------------- |
|
||||
| POST | `/v1/chat/completions` | OpenAI |
|
||||
| POST | `/v1/messages` | Anthropic |
|
||||
| POST | `/v1/responses` | OpenAI Responses |
|
||||
| POST | `/v1/embeddings` | OpenAI |
|
||||
| POST | `/v1/images/generations` | OpenAI |
|
||||
| GET | `/v1/models` | OpenAI |
|
||||
| POST | `/v1/messages/count_tokens` | Anthropic |
|
||||
| GET | `/v1beta/models` | Gemini |
|
||||
| POST | `/v1beta/models/{...path}` | Gemini generateContent |
|
||||
| POST | `/v1/api/chat` | Ollama |
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
```bash
|
||||
POST /v1/providers/{provider}/chat/completions
|
||||
POST /v1/providers/{provider}/embeddings
|
||||
POST /v1/providers/{provider}/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
---
|
||||
|
||||
## Semantic Cache
|
||||
|
||||
```bash
|
||||
# Get cache stats
|
||||
GET /api/cache
|
||||
|
||||
# Clear all caches
|
||||
DELETE /api/cache
|
||||
```
|
||||
|
||||
Response example:
|
||||
|
||||
```json
|
||||
{
|
||||
"semanticCache": {
|
||||
"memorySize": 42,
|
||||
"memoryMaxSize": 500,
|
||||
"dbSize": 128,
|
||||
"hitRate": 0.65
|
||||
},
|
||||
"idempotency": {
|
||||
"activeKeys": 3,
|
||||
"windowMs": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dashboard & Management
|
||||
|
||||
### Authentication
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------------- | ------- | --------------------- |
|
||||
| `/api/auth/login` | POST | Login |
|
||||
| `/api/auth/logout` | POST | Logout |
|
||||
| `/api/settings/require-login` | GET/PUT | Toggle login required |
|
||||
|
||||
### Provider Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------- | --------------- | ------------------------ |
|
||||
| `/api/providers` | GET/POST | List / create providers |
|
||||
| `/api/providers/[id]` | GET/PUT/DELETE | Manage a provider |
|
||||
| `/api/providers/[id]/test` | POST | Test provider connection |
|
||||
| `/api/providers/[id]/models` | GET | List provider models |
|
||||
| `/api/providers/validate` | POST | Validate provider config |
|
||||
| `/api/provider-nodes*` | Various | Provider node management |
|
||||
| `/api/provider-models` | GET/POST/DELETE | Custom models |
|
||||
|
||||
### OAuth Flows
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------------- | ------- | ----------------------- |
|
||||
| `/api/oauth/[provider]/[action]` | Various | Provider-specific OAuth |
|
||||
|
||||
### Routing & Config
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------- | -------- | ----------------------------- |
|
||||
| `/api/models/alias` | GET/POST | Model aliases |
|
||||
| `/api/models/catalog` | GET | All models by provider + type |
|
||||
| `/api/combos*` | Various | Combo management |
|
||||
| `/api/keys*` | Various | API key management |
|
||||
| `/api/pricing` | GET | Model pricing |
|
||||
|
||||
### Usage & Analytics
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | -------------------- |
|
||||
| `/api/usage/history` | GET | Usage history |
|
||||
| `/api/usage/logs` | GET | Usage logs |
|
||||
| `/api/usage/request-logs` | GET | Request-level logs |
|
||||
| `/api/usage/[connectionId]` | GET | Per-connection usage |
|
||||
|
||||
### Settings
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------------- | ------- | ---------------------- |
|
||||
| `/api/settings` | GET/PUT | General settings |
|
||||
| `/api/settings/proxy` | GET/PUT | Network proxy config |
|
||||
| `/api/settings/proxy/test` | POST | Test proxy connection |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt |
|
||||
|
||||
### Monitoring
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------ | ---------- | ----------------------- |
|
||||
| `/api/sessions` | GET | Active session tracking |
|
||||
| `/api/rate-limits` | GET | Per-account rate limits |
|
||||
| `/api/monitoring/health` | GET | Health check |
|
||||
| `/api/cache` | GET/DELETE | Cache stats / clear |
|
||||
|
||||
### Backup & Export/Import
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | --------------------------------------- |
|
||||
| `/api/db-backups` | GET | List available backups |
|
||||
| `/api/db-backups` | PUT | Create a manual backup |
|
||||
| `/api/db-backups` | POST | Restore from a specific backup |
|
||||
| `/api/db-backups/export` | GET | Download database as .sqlite file |
|
||||
| `/api/db-backups/import` | POST | Upload .sqlite file to replace database |
|
||||
| `/api/db-backups/exportAll` | GET | Download full backup as .tar.gz archive |
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------- | ------- | --------------------- |
|
||||
| `/api/sync/cloud` | Various | Cloud sync operations |
|
||||
| `/api/sync/initialize` | POST | Initialize sync |
|
||||
| `/api/cloud/*` | Various | Cloud management |
|
||||
|
||||
### CLI Tools
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------------- | ------ | ------------------- |
|
||||
| `/api/cli-tools/claude-settings` | GET | Claude CLI status |
|
||||
| `/api/cli-tools/codex-settings` | GET | Codex CLI status |
|
||||
| `/api/cli-tools/droid-settings` | GET | Droid CLI status |
|
||||
| `/api/cli-tools/openclaw-settings` | GET | OpenClaw CLI status |
|
||||
| `/api/cli-tools/runtime/[toolId]` | GET | Generic CLI runtime |
|
||||
|
||||
CLI responses include: `installed`, `runnable`, `command`, `commandPath`, `runtimeMode`, `reason`.
|
||||
|
||||
### ACP Agents
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------- | ------ | -------------------------------------------------------- |
|
||||
| `/api/acp/agents` | GET | List all detected agents (built-in + custom) with status |
|
||||
| `/api/acp/agents` | POST | Add custom agent or refresh detection cache |
|
||||
| `/api/acp/agents` | DELETE | Remove a custom agent by `id` query param |
|
||||
|
||||
GET response includes `agents[]` (id, name, binary, version, installed, protocol, isCustom) and `summary` (total, installed, notFound, builtIn, custom).
|
||||
|
||||
### Resilience & Rate Limits
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------- | ------- | ------------------------------- |
|
||||
| `/api/resilience` | GET/PUT | Get/update resilience profiles |
|
||||
| `/api/resilience/reset` | POST | Reset circuit breakers |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
| `/api/rate-limit` | GET | Global rate limit configuration |
|
||||
|
||||
### Evals
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------ | -------- | --------------------------------- |
|
||||
| `/api/evals` | GET/POST | List eval suites / run evaluation |
|
||||
|
||||
### Policies
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | --------------- | ----------------------- |
|
||||
| `/api/policies` | GET/POST/DELETE | Manage routing policies |
|
||||
|
||||
### Compliance
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | ----------------------------- |
|
||||
| `/api/compliance/audit-log` | GET | Compliance audit log (last N) |
|
||||
|
||||
### v1beta (Gemini-Compatible)
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------- | ------ | --------------------------------- |
|
||||
| `/v1beta/models` | GET | List models in Gemini format |
|
||||
| `/v1beta/models/{...path}` | POST | Gemini `generateContent` endpoint |
|
||||
|
||||
These endpoints mirror Gemini's API format for clients that expect native Gemini SDK compatibility.
|
||||
|
||||
### Internal / System APIs
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | ------ | ---------------------------------------------------- |
|
||||
| `/api/init` | GET | Application initialization check (used on first run) |
|
||||
| `/api/tags` | GET | Ollama-compatible model tags (for Ollama clients) |
|
||||
| `/api/restart` | POST | Trigger graceful server restart |
|
||||
| `/api/shutdown` | POST | Trigger graceful server shutdown |
|
||||
|
||||
> **Note:** These endpoints are used internally by the system or for Ollama client compatibility. They are not typically called by end users.
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
```
|
||||
|
||||
Transcribe audio files using Deepgram or AssemblyAI.
|
||||
|
||||
**Request:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@recording.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "Hello, this is the transcribed audio content.",
|
||||
"task": "transcribe",
|
||||
"language": "en",
|
||||
"duration": 12.5
|
||||
}
|
||||
```
|
||||
|
||||
**Supported providers:** `deepgram/nova-3`, `assemblyai/best`.
|
||||
|
||||
**Supported formats:** `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
## Ollama Compatibility
|
||||
|
||||
For clients that use Ollama's API format:
|
||||
|
||||
```bash
|
||||
# Chat endpoint (Ollama format)
|
||||
POST /v1/api/chat
|
||||
|
||||
# Model listing (Ollama format)
|
||||
GET /api/tags
|
||||
```
|
||||
|
||||
Requests are automatically translated between Ollama and internal formats.
|
||||
|
||||
---
|
||||
|
||||
## Telemetry
|
||||
|
||||
```bash
|
||||
# Get latency telemetry summary (p50/p95/p99 per provider)
|
||||
GET /api/telemetry/summary
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"claudeCode": { "p50": 245, "p95": 890, "p99": 1200, "count": 150 },
|
||||
"github": { "p50": 180, "p95": 620, "p99": 950, "count": 320 }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Budget
|
||||
|
||||
```bash
|
||||
# Get budget status for all API keys
|
||||
GET /api/usage/budget
|
||||
|
||||
# Set or update a budget
|
||||
POST /api/usage/budget
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"keyId": "key-123",
|
||||
"limit": 50.00,
|
||||
"period": "monthly"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Availability
|
||||
|
||||
```bash
|
||||
# Get real-time model availability across all providers
|
||||
GET /api/models/availability
|
||||
|
||||
# Check availability for a specific model
|
||||
POST /api/models/availability
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "claude-sonnet-4-5-20250929"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Request Processing
|
||||
|
||||
1. Client sends request to `/v1/*`
|
||||
2. Route handler calls `handleChat`, `handleEmbedding`, `handleAudioTranscription`, or `handleImageGeneration`
|
||||
3. Model is resolved (direct provider/model or alias/combo)
|
||||
4. Credentials selected from local DB with account availability filtering
|
||||
5. For chat: `handleChatCore` — format detection, translation, cache check, idempotency check
|
||||
6. Provider executor sends upstream request
|
||||
7. Response translated back to client format (chat) or returned as-is (embeddings/images/audio)
|
||||
8. Usage/logging recorded
|
||||
9. Fallback applies on errors according to combo rules
|
||||
|
||||
Full architecture reference: [`ARCHITECTURE.md`](ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
- Dashboard routes (`/dashboard/*`) use `auth_token` cookie
|
||||
- Login uses saved password hash; fallback to `INITIAL_PASSWORD`
|
||||
- `requireLogin` toggleable via `/api/settings/require-login`
|
||||
- `/v1/*` routes optionally require Bearer API key when `REQUIRE_API_KEY=true`
|
||||
@@ -0,0 +1,787 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/ARCHITECTURE.md) · 🇪🇸 [es](../es/ARCHITECTURE.md) · 🇫🇷 [fr](../fr/ARCHITECTURE.md) · 🇩🇪 [de](../de/ARCHITECTURE.md) · 🇮🇹 [it](../it/ARCHITECTURE.md) · 🇷🇺 [ru](../ru/ARCHITECTURE.md) · 🇨🇳 [zh-CN](../zh-CN/ARCHITECTURE.md) · 🇯🇵 [ja](../ja/ARCHITECTURE.md) · 🇰🇷 [ko](../ko/ARCHITECTURE.md) · 🇸🇦 [ar](../ar/ARCHITECTURE.md) · 🇮🇳 [in](../in/ARCHITECTURE.md) · 🇹🇭 [th](../th/ARCHITECTURE.md) · 🇻🇳 [vi](../vi/ARCHITECTURE.md) · 🇮🇩 [id](../id/ARCHITECTURE.md) · 🇲🇾 [ms](../ms/ARCHITECTURE.md) · 🇳🇱 [nl](../nl/ARCHITECTURE.md) · 🇵🇱 [pl](../pl/ARCHITECTURE.md) · 🇸🇪 [sv](../sv/ARCHITECTURE.md) · 🇳🇴 [no](../no/ARCHITECTURE.md) · 🇩🇰 [da](../da/ARCHITECTURE.md) · 🇫🇮 [fi](../fi/ARCHITECTURE.md) · 🇵🇹 [pt](../pt/ARCHITECTURE.md) · 🇷🇴 [ro](../ro/ARCHITECTURE.md) · 🇭🇺 [hu](../hu/ARCHITECTURE.md) · 🇧🇬 [bg](../bg/ARCHITECTURE.md) · 🇸🇰 [sk](../sk/ARCHITECTURE.md) · 🇺🇦 [uk-UA](../uk-UA/ARCHITECTURE.md) · 🇮🇱 [he](../he/ARCHITECTURE.md) · 🇵🇭 [phi](../phi/ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Architecture
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](ARCHITECTURE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/ARCHITECTURE.md) | 🇪🇸 [Español](i18n/es/ARCHITECTURE.md) | 🇫🇷 [Français](i18n/fr/ARCHITECTURE.md) | 🇮🇹 [Italiano](i18n/it/ARCHITECTURE.md) | 🇷🇺 [Русский](i18n/ru/ARCHITECTURE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/ARCHITECTURE.md) | 🇩🇪 [Deutsch](i18n/de/ARCHITECTURE.md) | 🇮🇳 [हिन्दी](i18n/in/ARCHITECTURE.md) | 🇹🇭 [ไทย](i18n/th/ARCHITECTURE.md) | 🇺🇦 [Українська](i18n/uk-UA/ARCHITECTURE.md) | 🇸🇦 [العربية](i18n/ar/ARCHITECTURE.md) | 🇯🇵 [日本語](i18n/ja/ARCHITECTURE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/ARCHITECTURE.md) | 🇧🇬 [Български](i18n/bg/ARCHITECTURE.md) | 🇩🇰 [Dansk](i18n/da/ARCHITECTURE.md) | 🇫🇮 [Suomi](i18n/fi/ARCHITECTURE.md) | 🇮🇱 [עברית](i18n/he/ARCHITECTURE.md) | 🇭🇺 [Magyar](i18n/hu/ARCHITECTURE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/ARCHITECTURE.md) | 🇰🇷 [한국어](i18n/ko/ARCHITECTURE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/ARCHITECTURE.md) | 🇳🇱 [Nederlands](i18n/nl/ARCHITECTURE.md) | 🇳🇴 [Norsk](i18n/no/ARCHITECTURE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/ARCHITECTURE.md) | 🇷🇴 [Română](i18n/ro/ARCHITECTURE.md) | 🇵🇱 [Polski](i18n/pl/ARCHITECTURE.md) | 🇸🇰 [Slovenčina](i18n/sk/ARCHITECTURE.md) | 🇸🇪 [Svenska](i18n/sv/ARCHITECTURE.md) | 🇵🇭 [Filipino](i18n/phi/ARCHITECTURE.md)
|
||||
|
||||
_Last updated: 2026-03-04_
|
||||
|
||||
## Executive Summary
|
||||
|
||||
OmniRoute is a local AI routing gateway and dashboard built on Next.js.
|
||||
It provides a single OpenAI-compatible endpoint (`/v1/*`) and routes traffic across multiple upstream providers with translation, fallback, token refresh, and usage tracking.
|
||||
|
||||
Core capabilities:
|
||||
|
||||
- OpenAI-compatible API surface for CLI/tools (28 providers)
|
||||
- Request/response translation across provider formats
|
||||
- Model combo fallback (multi-model sequence)
|
||||
- Account-level fallback (multi-account per provider)
|
||||
- OAuth + API-key provider connection management
|
||||
- Embedding generation via `/v1/embeddings` (6 providers, 9 models)
|
||||
- Image generation via `/v1/images/generations` (4 providers, 9 models)
|
||||
- Think tag parsing (`<think>...</think>`) for reasoning models
|
||||
- Response sanitization for strict OpenAI SDK compatibility
|
||||
- Role normalization (developer→system, system→user) for cross-provider compatibility
|
||||
- Structured output conversion (json_schema → Gemini responseSchema)
|
||||
- Local persistence for providers, keys, aliases, combos, settings, pricing
|
||||
- Usage/cost tracking and request logging
|
||||
- Optional cloud sync for multi-device/state sync
|
||||
- IP allowlist/blocklist for API access control
|
||||
- Thinking budget management (passthrough/auto/custom/adaptive)
|
||||
- Global system prompt injection
|
||||
- Session tracking and fingerprinting
|
||||
- Per-account enhanced rate limiting with provider-specific profiles
|
||||
- Circuit breaker pattern for provider resilience
|
||||
- Anti-thundering herd protection with mutex locking
|
||||
- Signature-based request deduplication cache
|
||||
- Domain layer: model availability, cost rules, fallback policy, lockout policy
|
||||
- Domain state persistence (SQLite write-through cache for fallbacks, budgets, lockouts, circuit breakers)
|
||||
- Policy engine for centralized request evaluation (lockout → budget → fallback)
|
||||
- Request telemetry with p50/p95/p99 latency aggregation
|
||||
- Correlation ID (X-Request-Id) for end-to-end tracing
|
||||
- Compliance audit logging with opt-out per API key
|
||||
- Eval framework for LLM quality assurance
|
||||
- Resilience UI dashboard with real-time circuit breaker status
|
||||
- Modular OAuth providers (12 individual modules under `src/lib/oauth/providers/`)
|
||||
|
||||
Primary runtime model:
|
||||
|
||||
- Next.js app routes under `src/app/api/*` implement both dashboard APIs and compatibility APIs
|
||||
- A shared SSE/routing core in `src/sse/*` + `open-sse/*` handles provider execution, translation, streaming, fallback, and usage
|
||||
|
||||
## Scope and Boundaries
|
||||
|
||||
### In Scope
|
||||
|
||||
- Local gateway runtime
|
||||
- Dashboard management APIs
|
||||
- Provider authentication and token refresh
|
||||
- Request translation and SSE streaming
|
||||
- Local state + usage persistence
|
||||
- Optional cloud sync orchestration
|
||||
|
||||
### Out of Scope
|
||||
|
||||
- Cloud service implementation behind `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Provider SLA/control plane outside local process
|
||||
- External CLI binaries themselves (Claude CLI, Codex CLI, etc.)
|
||||
|
||||
## High-Level System Context
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Clients[Developer Clients]
|
||||
C1[Claude Code]
|
||||
C2[Codex CLI]
|
||||
C3[OpenClaw / Droid / Cline / Continue / Roo]
|
||||
C4[Custom OpenAI-compatible clients]
|
||||
BROWSER[Browser Dashboard]
|
||||
end
|
||||
|
||||
subgraph Router[OmniRoute Local Process]
|
||||
API[V1 Compatibility API\n/v1/*]
|
||||
DASH[Dashboard + Management API\n/api/*]
|
||||
CORE[SSE + Translation Core\nopen-sse + src/sse]
|
||||
DB[(storage.sqlite)]
|
||||
UDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph Upstreams[Upstream Providers]
|
||||
P1[OAuth Providers\nClaude/Codex/Gemini/Qwen/iFlow/GitHub/Kiro/Cursor/Antigravity]
|
||||
P2[API Key Providers\nOpenAI/Anthropic/OpenRouter/GLM/Kimi/MiniMax\nDeepSeek/Groq/xAI/Mistral/Perplexity\nTogether/Fireworks/Cerebras/Cohere/NVIDIA]
|
||||
P3[Compatible Nodes\nOpenAI-compatible / Anthropic-compatible]
|
||||
end
|
||||
|
||||
subgraph Cloud[Optional Cloud Sync]
|
||||
CLOUD[Cloud Sync Endpoint\nNEXT_PUBLIC_CLOUD_URL]
|
||||
end
|
||||
|
||||
C1 --> API
|
||||
C2 --> API
|
||||
C3 --> API
|
||||
C4 --> API
|
||||
BROWSER --> DASH
|
||||
|
||||
API --> CORE
|
||||
DASH --> DB
|
||||
CORE --> DB
|
||||
CORE --> UDB
|
||||
|
||||
CORE --> P1
|
||||
CORE --> P2
|
||||
CORE --> P3
|
||||
|
||||
DASH --> CLOUD
|
||||
```
|
||||
|
||||
## Core Runtime Components
|
||||
|
||||
## 1) API and Routing Layer (Next.js App Routes)
|
||||
|
||||
Main directories:
|
||||
|
||||
- `src/app/api/v1/*` and `src/app/api/v1beta/*` for compatibility APIs
|
||||
- `src/app/api/*` for management/configuration APIs
|
||||
- Next rewrites in `next.config.mjs` map `/v1/*` to `/api/v1/*`
|
||||
|
||||
Important compatibility routes:
|
||||
|
||||
- `src/app/api/v1/chat/completions/route.ts`
|
||||
- `src/app/api/v1/messages/route.ts`
|
||||
- `src/app/api/v1/responses/route.ts`
|
||||
- `src/app/api/v1/models/route.ts` — includes custom models with `custom: true`
|
||||
- `src/app/api/v1/embeddings/route.ts` — embedding generation (6 providers)
|
||||
- `src/app/api/v1/images/generations/route.ts` — image generation (4+ providers incl. Antigravity/Nebius)
|
||||
- `src/app/api/v1/messages/count_tokens/route.ts`
|
||||
- `src/app/api/v1/providers/[provider]/chat/completions/route.ts` — dedicated per-provider chat
|
||||
- `src/app/api/v1/providers/[provider]/embeddings/route.ts` — dedicated per-provider embeddings
|
||||
- `src/app/api/v1/providers/[provider]/images/generations/route.ts` — dedicated per-provider images
|
||||
- `src/app/api/v1beta/models/route.ts`
|
||||
- `src/app/api/v1beta/models/[...path]/route.ts`
|
||||
|
||||
Management domains:
|
||||
|
||||
- Auth/settings: `src/app/api/auth/*`, `src/app/api/settings/*`
|
||||
- Providers/connections: `src/app/api/providers*`
|
||||
- Provider nodes: `src/app/api/provider-nodes*`
|
||||
- Custom models: `src/app/api/provider-models` (GET/POST/DELETE)
|
||||
- Model catalog: `src/app/api/models/route.ts` (GET)
|
||||
- Proxy config: `src/app/api/settings/proxy` (GET/PUT/DELETE) + `src/app/api/settings/proxy/test` (POST)
|
||||
- OAuth: `src/app/api/oauth/*`
|
||||
- Keys/aliases/combos/pricing: `src/app/api/keys*`, `src/app/api/models/alias`, `src/app/api/combos*`, `src/app/api/pricing`
|
||||
- Usage: `src/app/api/usage/*`
|
||||
- Sync/cloud: `src/app/api/sync/*`, `src/app/api/cloud/*`
|
||||
- CLI tooling helpers: `src/app/api/cli-tools/*`
|
||||
- IP filter: `src/app/api/settings/ip-filter` (GET/PUT)
|
||||
- Thinking budget: `src/app/api/settings/thinking-budget` (GET/PUT)
|
||||
- System prompt: `src/app/api/settings/system-prompt` (GET/PUT)
|
||||
- Sessions: `src/app/api/sessions` (GET)
|
||||
- Rate limits: `src/app/api/rate-limits` (GET)
|
||||
- Resilience: `src/app/api/resilience` (GET/PATCH) — provider profiles, circuit breaker, rate limit state
|
||||
- Resilience reset: `src/app/api/resilience/reset` (POST) — reset breakers + cooldowns
|
||||
- Cache stats: `src/app/api/cache/stats` (GET/DELETE)
|
||||
- Model availability: `src/app/api/models/availability` (GET/POST)
|
||||
- Telemetry: `src/app/api/telemetry/summary` (GET)
|
||||
- Budget: `src/app/api/usage/budget` (GET/POST)
|
||||
- Fallback chains: `src/app/api/fallback/chains` (GET/POST/DELETE)
|
||||
- Compliance audit: `src/app/api/compliance/audit-log` (GET)
|
||||
- Evals: `src/app/api/evals` (GET/POST), `src/app/api/evals/[suiteId]` (GET)
|
||||
- Policies: `src/app/api/policies` (GET/POST)
|
||||
|
||||
## 2) SSE + Translation Core
|
||||
|
||||
Main flow modules:
|
||||
|
||||
- Entry: `src/sse/handlers/chat.ts`
|
||||
- Core orchestration: `open-sse/handlers/chatCore.ts`
|
||||
- Provider execution adapters: `open-sse/executors/*`
|
||||
- Format detection/provider config: `open-sse/services/provider.ts`
|
||||
- Model parse/resolve: `src/sse/services/model.ts`, `open-sse/services/model.ts`
|
||||
- Account fallback logic: `open-sse/services/accountFallback.ts`
|
||||
- Translation registry: `open-sse/translator/index.ts`
|
||||
- Stream transformations: `open-sse/utils/stream.ts`, `open-sse/utils/streamHandler.ts`
|
||||
- Usage extraction/normalization: `open-sse/utils/usageTracking.ts`
|
||||
- Think tag parser: `open-sse/utils/thinkTagParser.ts`
|
||||
- Embedding handler: `open-sse/handlers/embeddings.ts`
|
||||
- Embedding provider registry: `open-sse/config/embeddingRegistry.ts`
|
||||
- Image generation handler: `open-sse/handlers/imageGeneration.ts`
|
||||
- Image provider registry: `open-sse/config/imageRegistry.ts`
|
||||
- Response sanitization: `open-sse/handlers/responseSanitizer.ts`
|
||||
- Role normalization: `open-sse/services/roleNormalizer.ts`
|
||||
|
||||
Services (business logic):
|
||||
|
||||
- Account selection/scoring: `open-sse/services/accountSelector.ts`
|
||||
- Context lifecycle management: `open-sse/services/contextManager.ts`
|
||||
- IP filter enforcement: `open-sse/services/ipFilter.ts`
|
||||
- Session tracking: `open-sse/services/sessionManager.ts`
|
||||
- Request deduplication: `open-sse/services/signatureCache.ts`
|
||||
- System prompt injection: `open-sse/services/systemPrompt.ts`
|
||||
- Thinking budget management: `open-sse/services/thinkingBudget.ts`
|
||||
- Wildcard model routing: `open-sse/services/wildcardRouter.ts`
|
||||
- Rate limit management: `open-sse/services/rateLimitManager.ts`
|
||||
- Circuit breaker: `open-sse/services/circuitBreaker.ts`
|
||||
|
||||
Domain layer modules:
|
||||
|
||||
- Model availability: `src/lib/domain/modelAvailability.ts`
|
||||
- Cost rules/budgets: `src/lib/domain/costRules.ts`
|
||||
- Fallback policy: `src/lib/domain/fallbackPolicy.ts`
|
||||
- Combo resolver: `src/lib/domain/comboResolver.ts`
|
||||
- Lockout policy: `src/lib/domain/lockoutPolicy.ts`
|
||||
- Policy engine: `src/domain/policyEngine.ts` — centralized lockout → budget → fallback evaluation
|
||||
- Error codes catalog: `src/lib/domain/errorCodes.ts`
|
||||
- Request ID: `src/lib/domain/requestId.ts`
|
||||
- Fetch timeout: `src/lib/domain/fetchTimeout.ts`
|
||||
- Request telemetry: `src/lib/domain/requestTelemetry.ts`
|
||||
- Compliance/audit: `src/lib/domain/compliance/index.ts`
|
||||
- Eval runner: `src/lib/domain/evalRunner.ts`
|
||||
- Domain state persistence: `src/lib/db/domainState.ts` — SQLite CRUD for fallback chains, budgets, cost history, lockout state, circuit breakers
|
||||
|
||||
OAuth provider modules (12 individual files under `src/lib/oauth/providers/`):
|
||||
|
||||
- Registry index: `src/lib/oauth/providers/index.ts`
|
||||
- Individual providers: `claude.ts`, `codex.ts`, `gemini.ts`, `antigravity.ts`, `iflow.ts`, `qwen.ts`, `kimi-coding.ts`, `github.ts`, `kiro.ts`, `cursor.ts`, `kilocode.ts`, `cline.ts`
|
||||
- Thin wrapper: `src/lib/oauth/providers.ts` — re-exports from individual modules
|
||||
|
||||
## 3) Persistence Layer
|
||||
|
||||
Primary state DB (SQLite):
|
||||
|
||||
- Core infra: `src/lib/db/core.ts` (better-sqlite3, migrations, WAL)
|
||||
- Re-export facade: `src/lib/localDb.ts` (thin compatibility layer for callers)
|
||||
- file: `${DATA_DIR}/storage.sqlite` (or `$XDG_CONFIG_HOME/omniroute/storage.sqlite` when set, else `~/.omniroute/storage.sqlite`)
|
||||
- entities (tables + KV namespaces): providerConnections, providerNodes, modelAliases, combos, apiKeys, settings, pricing, **customModels**, **proxyConfig**, **ipFilter**, **thinkingBudget**, **systemPrompt**
|
||||
|
||||
Usage persistence:
|
||||
|
||||
- facade: `src/lib/usageDb.ts` (decomposed modules in `src/lib/usage/*`)
|
||||
- SQLite tables in `storage.sqlite`: `usage_history`, `call_logs`, `proxy_logs`
|
||||
- optional file artifacts remain for compatibility/debug (`${DATA_DIR}/log.txt`, `${DATA_DIR}/call_logs/`, `<repo>/logs/...`)
|
||||
- legacy JSON files are migrated to SQLite by startup migrations when present
|
||||
|
||||
Domain State DB (SQLite):
|
||||
|
||||
- `src/lib/db/domainState.ts` — CRUD operations for domain state
|
||||
- Tables (created in `src/lib/db/core.ts`): `domain_fallback_chains`, `domain_budgets`, `domain_cost_history`, `domain_lockout_state`, `domain_circuit_breakers`
|
||||
- Write-through cache pattern: in-memory Maps are authoritative at runtime; mutations are written synchronously to SQLite; state is restored from DB on cold start
|
||||
|
||||
## 4) Auth + Security Surfaces
|
||||
|
||||
- Dashboard cookie auth: `src/proxy.ts`, `src/app/api/auth/login/route.ts`
|
||||
- API key generation/verification: `src/shared/utils/apiKey.ts`
|
||||
- Provider secrets persisted in `providerConnections` entries
|
||||
- Outbound proxy support via `open-sse/utils/proxyFetch.ts` (env vars) and `open-sse/utils/networkProxy.ts` (configurable per-provider or global)
|
||||
|
||||
## 5) Cloud Sync
|
||||
|
||||
- Scheduler init: `src/lib/initCloudSync.ts`, `src/shared/services/initializeCloudSync.ts`
|
||||
- Periodic task: `src/shared/services/cloudSyncScheduler.ts`
|
||||
- Control route: `src/app/api/sync/cloud/route.ts`
|
||||
|
||||
## Request Lifecycle (`/v1/chat/completions`)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant Client as CLI/SDK Client
|
||||
participant Route as /api/v1/chat/completions
|
||||
participant Chat as src/sse/handlers/chat
|
||||
participant Core as open-sse/handlers/chatCore
|
||||
participant Model as Model Resolver
|
||||
participant Auth as Credential Selector
|
||||
participant Exec as Provider Executor
|
||||
participant Prov as Upstream Provider
|
||||
participant Stream as Stream Translator
|
||||
participant Usage as usageDb
|
||||
|
||||
Client->>Route: POST /v1/chat/completions
|
||||
Route->>Chat: handleChat(request)
|
||||
Chat->>Model: parse/resolve model or combo
|
||||
|
||||
alt Combo model
|
||||
Chat->>Chat: iterate combo models (handleComboChat)
|
||||
end
|
||||
|
||||
Chat->>Auth: getProviderCredentials(provider)
|
||||
Auth-->>Chat: active account + tokens/api key
|
||||
|
||||
Chat->>Core: handleChatCore(body, modelInfo, credentials)
|
||||
Core->>Core: detect source format
|
||||
Core->>Core: translate request to target format
|
||||
Core->>Exec: execute(provider, transformedBody)
|
||||
Exec->>Prov: upstream API call
|
||||
Prov-->>Exec: SSE/JSON response
|
||||
Exec-->>Core: response + metadata
|
||||
|
||||
alt 401/403
|
||||
Core->>Exec: refreshCredentials()
|
||||
Exec-->>Core: updated tokens
|
||||
Core->>Exec: retry request
|
||||
end
|
||||
|
||||
Core->>Stream: translate/normalize stream to client format
|
||||
Stream-->>Client: SSE chunks / JSON response
|
||||
|
||||
Stream->>Usage: extract usage + persist history/log
|
||||
```
|
||||
|
||||
## Combo + Account Fallback Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[Incoming model string] --> B{Is combo name?}
|
||||
B -- Yes --> C[Load combo models sequence]
|
||||
B -- No --> D[Single model path]
|
||||
|
||||
C --> E[Try model N]
|
||||
E --> F[Resolve provider/model]
|
||||
D --> F
|
||||
|
||||
F --> G[Select account credentials]
|
||||
G --> H{Credentials available?}
|
||||
H -- No --> I[Return provider unavailable]
|
||||
H -- Yes --> J[Execute request]
|
||||
|
||||
J --> K{Success?}
|
||||
K -- Yes --> L[Return response]
|
||||
K -- No --> M{Fallback-eligible error?}
|
||||
|
||||
M -- No --> N[Return error]
|
||||
M -- Yes --> O[Mark account unavailable cooldown]
|
||||
O --> P{Another account for provider?}
|
||||
P -- Yes --> G
|
||||
P -- No --> Q{In combo with next model?}
|
||||
Q -- Yes --> E
|
||||
Q -- No --> R[Return all unavailable]
|
||||
```
|
||||
|
||||
Fallback decisions are driven by `open-sse/services/accountFallback.ts` using status codes and error-message heuristics.
|
||||
|
||||
## OAuth Onboarding and Token Refresh Lifecycle
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Dashboard UI
|
||||
participant OAuth as /api/oauth/[provider]/[action]
|
||||
participant ProvAuth as Provider Auth Server
|
||||
participant DB as localDb
|
||||
participant Test as /api/providers/[id]/test
|
||||
participant Exec as Provider Executor
|
||||
|
||||
UI->>OAuth: GET authorize or device-code
|
||||
OAuth->>ProvAuth: create auth/device flow
|
||||
ProvAuth-->>OAuth: auth URL or device code payload
|
||||
OAuth-->>UI: flow data
|
||||
|
||||
UI->>OAuth: POST exchange or poll
|
||||
OAuth->>ProvAuth: token exchange/poll
|
||||
ProvAuth-->>OAuth: access/refresh tokens
|
||||
OAuth->>DB: createProviderConnection(oauth data)
|
||||
OAuth-->>UI: success + connection id
|
||||
|
||||
UI->>Test: POST /api/providers/[id]/test
|
||||
Test->>Exec: validate credentials / optional refresh
|
||||
Exec-->>Test: valid or refreshed token info
|
||||
Test->>DB: update status/tokens/errors
|
||||
Test-->>UI: validation result
|
||||
```
|
||||
|
||||
Refresh during live traffic is executed inside `open-sse/handlers/chatCore.ts` via executor `refreshCredentials()`.
|
||||
|
||||
## Cloud Sync Lifecycle (Enable / Sync / Disable)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Endpoint Page UI
|
||||
participant Sync as /api/sync/cloud
|
||||
participant DB as localDb
|
||||
participant Cloud as External Cloud Sync
|
||||
participant Claude as ~/.claude/settings.json
|
||||
|
||||
UI->>Sync: POST action=enable
|
||||
Sync->>DB: set cloudEnabled=true
|
||||
Sync->>DB: ensure API key exists
|
||||
Sync->>Cloud: POST /sync/{machineId} (providers/aliases/combos/keys)
|
||||
Cloud-->>Sync: sync result
|
||||
Sync->>Cloud: GET /{machineId}/v1/verify
|
||||
Sync-->>UI: enabled + verification status
|
||||
|
||||
UI->>Sync: POST action=sync
|
||||
Sync->>Cloud: POST /sync/{machineId}
|
||||
Cloud-->>Sync: remote data
|
||||
Sync->>DB: update newer local tokens/status
|
||||
Sync-->>UI: synced
|
||||
|
||||
UI->>Sync: POST action=disable
|
||||
Sync->>DB: set cloudEnabled=false
|
||||
Sync->>Cloud: DELETE /sync/{machineId}
|
||||
Sync->>Claude: switch ANTHROPIC_BASE_URL back to local (if needed)
|
||||
Sync-->>UI: disabled
|
||||
```
|
||||
|
||||
Periodic sync is triggered by `CloudSyncScheduler` when cloud is enabled.
|
||||
|
||||
## Data Model and Storage Map
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
SETTINGS ||--o{ PROVIDER_CONNECTION : controls
|
||||
PROVIDER_NODE ||--o{ PROVIDER_CONNECTION : backs_compatible_provider
|
||||
PROVIDER_CONNECTION ||--o{ USAGE_ENTRY : emits_usage
|
||||
|
||||
SETTINGS {
|
||||
boolean cloudEnabled
|
||||
number stickyRoundRobinLimit
|
||||
boolean requireLogin
|
||||
string password_hash
|
||||
string fallbackStrategy
|
||||
json rateLimitDefaults
|
||||
json providerProfiles
|
||||
}
|
||||
|
||||
PROVIDER_CONNECTION {
|
||||
string id
|
||||
string provider
|
||||
string authType
|
||||
string name
|
||||
number priority
|
||||
boolean isActive
|
||||
string apiKey
|
||||
string accessToken
|
||||
string refreshToken
|
||||
string expiresAt
|
||||
string testStatus
|
||||
string lastError
|
||||
string rateLimitedUntil
|
||||
json providerSpecificData
|
||||
}
|
||||
|
||||
PROVIDER_NODE {
|
||||
string id
|
||||
string type
|
||||
string name
|
||||
string prefix
|
||||
string apiType
|
||||
string baseUrl
|
||||
}
|
||||
|
||||
MODEL_ALIAS {
|
||||
string alias
|
||||
string targetModel
|
||||
}
|
||||
|
||||
COMBO {
|
||||
string id
|
||||
string name
|
||||
string[] models
|
||||
}
|
||||
|
||||
API_KEY {
|
||||
string id
|
||||
string name
|
||||
string key
|
||||
string machineId
|
||||
}
|
||||
|
||||
USAGE_ENTRY {
|
||||
string provider
|
||||
string model
|
||||
number prompt_tokens
|
||||
number completion_tokens
|
||||
string connectionId
|
||||
string timestamp
|
||||
}
|
||||
|
||||
CUSTOM_MODEL {
|
||||
string id
|
||||
string name
|
||||
string providerId
|
||||
}
|
||||
|
||||
PROXY_CONFIG {
|
||||
string global
|
||||
json providers
|
||||
}
|
||||
|
||||
IP_FILTER {
|
||||
string mode
|
||||
string[] allowlist
|
||||
string[] blocklist
|
||||
}
|
||||
|
||||
THINKING_BUDGET {
|
||||
string mode
|
||||
number customBudget
|
||||
string effortLevel
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT {
|
||||
boolean enabled
|
||||
string prompt
|
||||
string position
|
||||
}
|
||||
```
|
||||
|
||||
Physical storage files:
|
||||
|
||||
- primary runtime DB: `${DATA_DIR}/storage.sqlite`
|
||||
- request log lines: `${DATA_DIR}/log.txt` (compat/debug artifact)
|
||||
- structured call payload archives: `${DATA_DIR}/call_logs/`
|
||||
- optional translator/request debug sessions: `<repo>/logs/...`
|
||||
|
||||
## Deployment Topology
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph LocalHost[Developer Host]
|
||||
CLI[CLI Tools]
|
||||
Browser[Dashboard Browser]
|
||||
end
|
||||
|
||||
subgraph ContainerOrProcess[OmniRoute Runtime]
|
||||
Next[Next.js Server\nPORT=20128]
|
||||
Core[SSE Core + Executors]
|
||||
MainDB[(storage.sqlite)]
|
||||
UsageDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph External[External Services]
|
||||
Providers[AI Providers]
|
||||
SyncCloud[Cloud Sync Service]
|
||||
end
|
||||
|
||||
CLI --> Next
|
||||
Browser --> Next
|
||||
Next --> Core
|
||||
Next --> MainDB
|
||||
Core --> MainDB
|
||||
Core --> UsageDB
|
||||
Core --> Providers
|
||||
Next --> SyncCloud
|
||||
```
|
||||
|
||||
## Module Mapping (Decision-Critical)
|
||||
|
||||
### Route and API Modules
|
||||
|
||||
- `src/app/api/v1/*`, `src/app/api/v1beta/*`: compatibility APIs
|
||||
- `src/app/api/v1/providers/[provider]/*`: dedicated per-provider routes (chat, embeddings, images)
|
||||
- `src/app/api/providers*`: provider CRUD, validation, testing
|
||||
- `src/app/api/provider-nodes*`: custom compatible node management
|
||||
- `src/app/api/provider-models`: custom model management (CRUD)
|
||||
- `src/app/api/models/route.ts`: model catalog API (aliases + custom models)
|
||||
- `src/app/api/oauth/*`: OAuth/device-code flows
|
||||
- `src/app/api/keys*`: local API key lifecycle
|
||||
- `src/app/api/models/alias`: alias management
|
||||
- `src/app/api/combos*`: fallback combo management
|
||||
- `src/app/api/pricing`: pricing overrides for cost calculation
|
||||
- `src/app/api/settings/proxy`: proxy configuration (GET/PUT/DELETE)
|
||||
- `src/app/api/settings/proxy/test`: outbound proxy connectivity test (POST)
|
||||
- `src/app/api/usage/*`: usage and logs APIs
|
||||
- `src/app/api/sync/*` + `src/app/api/cloud/*`: cloud sync and cloud-facing helpers
|
||||
- `src/app/api/cli-tools/*`: local CLI config writers/checkers
|
||||
- `src/app/api/settings/ip-filter`: IP allowlist/blocklist (GET/PUT)
|
||||
- `src/app/api/settings/thinking-budget`: thinking token budget config (GET/PUT)
|
||||
- `src/app/api/settings/system-prompt`: global system prompt (GET/PUT)
|
||||
- `src/app/api/sessions`: active session listing (GET)
|
||||
- `src/app/api/rate-limits`: per-account rate limit status (GET)
|
||||
|
||||
### Routing and Execution Core
|
||||
|
||||
- `src/sse/handlers/chat.ts`: request parse, combo handling, account selection loop
|
||||
- `open-sse/handlers/chatCore.ts`: translation, executor dispatch, retry/refresh handling, stream setup
|
||||
- `open-sse/executors/*`: provider-specific network and format behavior
|
||||
|
||||
### Translation Registry and Format Converters
|
||||
|
||||
- `open-sse/translator/index.ts`: translator registry and orchestration
|
||||
- Request translators: `open-sse/translator/request/*`
|
||||
- Response translators: `open-sse/translator/response/*`
|
||||
- Format constants: `open-sse/translator/formats.ts`
|
||||
|
||||
### Persistence
|
||||
|
||||
- `src/lib/db/*`: persistent config/state and domain persistence on SQLite
|
||||
- `src/lib/localDb.ts`: compatibility re-export for DB modules
|
||||
- `src/lib/usageDb.ts`: usage history/call logs facade on top of SQLite tables
|
||||
|
||||
## Provider Executor Coverage (Strategy Pattern)
|
||||
|
||||
Each provider has a specialized executor extending `BaseExecutor` (in `open-sse/executors/base.ts`), which provides URL building, header construction, retry with exponential backoff, credential refresh hooks, and the `execute()` orchestration method.
|
||||
|
||||
| Executor | Provider(s) | Special Handling |
|
||||
| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------- |
|
||||
| `DefaultExecutor` | OpenAI, Claude, Gemini, Qwen, iFlow, OpenRouter, GLM, Kimi, MiniMax, DeepSeek, Groq, xAI, Mistral, Perplexity, Together, Fireworks, Cerebras, Cohere, NVIDIA | Dynamic URL/header config per provider |
|
||||
| `AntigravityExecutor` | Google Antigravity | Custom project/session IDs, Retry-After parsing |
|
||||
| `CodexExecutor` | OpenAI Codex | Injects system instructions, forces reasoning effort |
|
||||
| `CursorExecutor` | Cursor IDE | ConnectRPC protocol, Protobuf encoding, request signing via checksum |
|
||||
| `GithubExecutor` | GitHub Copilot | Copilot token refresh, VSCode-mimicking headers |
|
||||
| `KiroExecutor` | AWS CodeWhisperer/Kiro | AWS EventStream binary format → SSE conversion |
|
||||
| `GeminiCLIExecutor` | Gemini CLI | Google OAuth token refresh cycle |
|
||||
|
||||
All other providers (including custom compatible nodes) use the `DefaultExecutor`.
|
||||
|
||||
## Provider Compatibility Matrix
|
||||
|
||||
| Provider | Format | Auth | Stream | Non-Stream | Token Refresh | Usage API |
|
||||
| ---------------- | ---------------- | --------------------- | ---------------- | ---------- | ------------- | ------------------ |
|
||||
| Claude | claude | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Admin only |
|
||||
| Gemini | gemini | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Gemini CLI | gemini-cli | OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Antigravity | antigravity | OAuth | ✅ | ✅ | ✅ | ✅ Full quota API |
|
||||
| OpenAI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Codex | openai-responses | OAuth | ✅ forced | ❌ | ✅ | ✅ Rate limits |
|
||||
| GitHub Copilot | openai | OAuth + Copilot Token | ✅ | ✅ | ✅ | ✅ Quota snapshots |
|
||||
| Cursor | cursor | Custom checksum | ✅ | ✅ | ❌ | ❌ |
|
||||
| Kiro | kiro | AWS SSO OIDC | ✅ (EventStream) | ❌ | ✅ | ✅ Usage limits |
|
||||
| Qwen | openai | OAuth | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| iFlow | openai | OAuth (Basic) | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| OpenRouter | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| GLM/Kimi/MiniMax | claude | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| DeepSeek | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Groq | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| xAI (Grok) | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Mistral | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Perplexity | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Together AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Fireworks AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cerebras | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cohere | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| NVIDIA NIM | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
|
||||
## Format Translation Coverage
|
||||
|
||||
Detected source formats include:
|
||||
|
||||
- `openai`
|
||||
- `openai-responses`
|
||||
- `claude`
|
||||
- `gemini`
|
||||
|
||||
Target formats include:
|
||||
|
||||
- OpenAI chat/Responses
|
||||
- Claude
|
||||
- Gemini/Gemini-CLI/Antigravity envelope
|
||||
- Kiro
|
||||
- Cursor
|
||||
|
||||
Translations use **OpenAI as the hub format** — all conversions go through OpenAI as intermediate:
|
||||
|
||||
```
|
||||
Source Format → OpenAI (hub) → Target Format
|
||||
```
|
||||
|
||||
Translations are selected dynamically based on source payload shape and provider target format.
|
||||
|
||||
Additional processing layers in the translation pipeline:
|
||||
|
||||
- **Response sanitization** — Strips non-standard fields from OpenAI-format responses (both streaming and non-streaming) to ensure strict SDK compliance
|
||||
- **Role normalization** — Converts `developer` → `system` for non-OpenAI targets; merges `system` → `user` for models that reject the system role (GLM, ERNIE)
|
||||
- **Think tag extraction** — Parses `<think>...</think>` blocks from content into `reasoning_content` field
|
||||
- **Structured output** — Converts OpenAI `response_format.json_schema` to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
## Supported API Endpoints
|
||||
|
||||
| Endpoint | Format | Handler |
|
||||
| -------------------------------------------------- | ------------------ | ---------------------------------------------------- |
|
||||
| `POST /v1/chat/completions` | OpenAI Chat | `src/sse/handlers/chat.ts` |
|
||||
| `POST /v1/messages` | Claude Messages | Same handler (auto-detected) |
|
||||
| `POST /v1/responses` | OpenAI Responses | `open-sse/handlers/responsesHandler.ts` |
|
||||
| `POST /v1/embeddings` | OpenAI Embeddings | `open-sse/handlers/embeddings.ts` |
|
||||
| `GET /v1/embeddings` | Model listing | API route |
|
||||
| `POST /v1/images/generations` | OpenAI Images | `open-sse/handlers/imageGeneration.ts` |
|
||||
| `GET /v1/images/generations` | Model listing | API route |
|
||||
| `POST /v1/providers/{provider}/chat/completions` | OpenAI Chat | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/embeddings` | OpenAI Embeddings | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/images/generations` | OpenAI Images | Dedicated per-provider with model validation |
|
||||
| `POST /v1/messages/count_tokens` | Claude Token Count | API route |
|
||||
| `GET /v1/models` | OpenAI Models list | API route (chat + embedding + image + custom models) |
|
||||
| `GET /api/models/catalog` | Catalog | All models grouped by provider + type |
|
||||
| `POST /v1beta/models/*:streamGenerateContent` | Gemini native | API route |
|
||||
| `GET/PUT/DELETE /api/settings/proxy` | Proxy Config | Network proxy configuration |
|
||||
| `POST /api/settings/proxy/test` | Proxy Connectivity | Proxy health/connectivity test endpoint |
|
||||
| `GET/POST/DELETE /api/provider-models` | Custom Models | Custom model management per provider |
|
||||
|
||||
## Bypass Handler
|
||||
|
||||
The bypass handler (`open-sse/utils/bypassHandler.ts`) intercepts known "throwaway" requests from Claude CLI — warmup pings, title extractions, and token counts — and returns a **fake response** without consuming upstream provider tokens. This is triggered only when `User-Agent` contains `claude-cli`.
|
||||
|
||||
## Request Logger Pipeline
|
||||
|
||||
The request logger (`open-sse/utils/requestLogger.ts`) provides a 7-stage debug logging pipeline, disabled by default, enabled via `ENABLE_REQUEST_LOGS=true`:
|
||||
|
||||
```
|
||||
1_req_client.json → 2_req_source.json → 3_req_openai.json → 4_req_target.json
|
||||
→ 5_res_provider.txt → 6_res_openai.txt → 7_res_client.txt
|
||||
```
|
||||
|
||||
Files are written to `<repo>/logs/<session>/` for each request session.
|
||||
|
||||
## Failure Modes and Resilience
|
||||
|
||||
## 1) Account/Provider Availability
|
||||
|
||||
- provider account cooldown on transient/rate/auth errors
|
||||
- account fallback before failing request
|
||||
- combo model fallback when current model/provider path is exhausted
|
||||
|
||||
## 2) Token Expiry
|
||||
|
||||
- pre-check and refresh with retry for refreshable providers
|
||||
- 401/403 retry after refresh attempt in core path
|
||||
|
||||
## 3) Stream Safety
|
||||
|
||||
- disconnect-aware stream controller
|
||||
- translation stream with end-of-stream flush and `[DONE]` handling
|
||||
- usage estimation fallback when provider usage metadata is missing
|
||||
|
||||
## 4) Cloud Sync Degradation
|
||||
|
||||
- sync errors are surfaced but local runtime continues
|
||||
- scheduler has retry-capable logic, but periodic execution currently calls single-attempt sync by default
|
||||
|
||||
## 5) Data Integrity
|
||||
|
||||
- SQLite schema migrations and auto-upgrade hooks at startup
|
||||
- legacy JSON → SQLite migration compatibility path
|
||||
|
||||
## Observability and Operational Signals
|
||||
|
||||
Runtime visibility sources:
|
||||
|
||||
- console logs from `src/sse/utils/logger.ts`
|
||||
- per-request usage aggregates in SQLite (`usage_history`, `call_logs`, `proxy_logs`)
|
||||
- textual request status log in `log.txt` (optional/compat)
|
||||
- optional deep request/translation logs under `logs/` when `ENABLE_REQUEST_LOGS=true`
|
||||
- dashboard usage endpoints (`/api/usage/*`) for UI consumption
|
||||
|
||||
## Security-Sensitive Boundaries
|
||||
|
||||
- JWT secret (`JWT_SECRET`) secures dashboard session cookie verification/signing
|
||||
- Initial password bootstrap (`INITIAL_PASSWORD`) should be explicitly configured for first-run provisioning
|
||||
- API key HMAC secret (`API_KEY_SECRET`) secures generated local API key format
|
||||
- Provider secrets (API keys/tokens) are persisted in local DB and should be protected at filesystem level
|
||||
- Cloud sync endpoints rely on API key auth + machine id semantics
|
||||
|
||||
## Environment and Runtime Matrix
|
||||
|
||||
Environment variables actively used by code:
|
||||
|
||||
- App/auth: `JWT_SECRET`, `INITIAL_PASSWORD`
|
||||
- Storage: `DATA_DIR`
|
||||
- Compatible node behavior: `ALLOW_MULTI_CONNECTIONS_PER_COMPAT_NODE`
|
||||
- Optional storage base override (Linux/macOS when `DATA_DIR` unset): `XDG_CONFIG_HOME`
|
||||
- Security hashing: `API_KEY_SECRET`, `MACHINE_ID_SALT`
|
||||
- Logging: `ENABLE_REQUEST_LOGS`
|
||||
- Sync/cloud URLing: `NEXT_PUBLIC_BASE_URL`, `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Outbound proxy: `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY` and lowercase variants
|
||||
- SOCKS5 feature flags: `ENABLE_SOCKS5_PROXY`, `NEXT_PUBLIC_ENABLE_SOCKS5_PROXY`
|
||||
- Platform/runtime helpers (not app-specific config): `APPDATA`, `NODE_ENV`, `PORT`, `HOSTNAME`
|
||||
|
||||
## Known Architectural Notes
|
||||
|
||||
1. `usageDb` and `localDb` share the same base directory policy (`DATA_DIR` -> `XDG_CONFIG_HOME/omniroute` -> `~/.omniroute`) with legacy file migration.
|
||||
2. `/api/v1/route.ts` delegates to the same unified catalog builder used by `/api/v1/models` (`src/app/api/v1/models/catalog.ts`) to avoid semantic drift.
|
||||
3. Request logger writes full headers/body when enabled; treat log directory as sensitive.
|
||||
4. Cloud behavior depends on correct `NEXT_PUBLIC_BASE_URL` and cloud endpoint reachability.
|
||||
5. The `open-sse/` directory is published as the `@omniroute/open-sse` **npm workspace package**. Source code imports it via `@omniroute/open-sse/...` (resolved by Next.js `transpilePackages`). File paths in this document still use the directory name `open-sse/` for consistency.
|
||||
6. Charts in the dashboard use **Recharts** (SVG-based) for accessible, interactive analytics visualizations (model usage bar charts, provider breakdown tables with success rates).
|
||||
7. E2E tests use **Playwright** (`tests/e2e/`), run via `npm run test:e2e`. Unit tests use **Node.js test runner** (`tests/unit/`), run via `npm run test:unit`. Source code under `src/` is **TypeScript** (`.ts`/`.tsx`); the `open-sse/` workspace remains JavaScript (`.js`).
|
||||
8. Settings page is organized into 5 tabs: Security, Routing (6 global strategies: fill-first, round-robin, p2c, random, least-used, cost-optimized), Resilience (editable rate limits, circuit breaker, policies), AI (thinking budget, system prompt, prompt cache), Advanced (proxy).
|
||||
|
||||
## Operational Verification Checklist
|
||||
|
||||
- Build from source: `npm run build`
|
||||
- Build Docker image: `docker build -t omniroute .`
|
||||
- Start service and verify:
|
||||
- `GET /api/settings`
|
||||
- `GET /api/v1/models`
|
||||
- CLI target base URL should be `http://<host>:20128/v1` when `PORT=20128`
|
||||
@@ -0,0 +1,67 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/AUTO-COMBO.md) · 🇪🇸 [es](../es/AUTO-COMBO.md) · 🇫🇷 [fr](../fr/AUTO-COMBO.md) · 🇩🇪 [de](../de/AUTO-COMBO.md) · 🇮🇹 [it](../it/AUTO-COMBO.md) · 🇷🇺 [ru](../ru/AUTO-COMBO.md) · 🇨🇳 [zh-CN](../zh-CN/AUTO-COMBO.md) · 🇯🇵 [ja](../ja/AUTO-COMBO.md) · 🇰🇷 [ko](../ko/AUTO-COMBO.md) · 🇸🇦 [ar](../ar/AUTO-COMBO.md) · 🇮🇳 [in](../in/AUTO-COMBO.md) · 🇹🇭 [th](../th/AUTO-COMBO.md) · 🇻🇳 [vi](../vi/AUTO-COMBO.md) · 🇮🇩 [id](../id/AUTO-COMBO.md) · 🇲🇾 [ms](../ms/AUTO-COMBO.md) · 🇳🇱 [nl](../nl/AUTO-COMBO.md) · 🇵🇱 [pl](../pl/AUTO-COMBO.md) · 🇸🇪 [sv](../sv/AUTO-COMBO.md) · 🇳🇴 [no](../no/AUTO-COMBO.md) · 🇩🇰 [da](../da/AUTO-COMBO.md) · 🇫🇮 [fi](../fi/AUTO-COMBO.md) · 🇵🇹 [pt](../pt/AUTO-COMBO.md) · 🇷🇴 [ro](../ro/AUTO-COMBO.md) · 🇭🇺 [hu](../hu/AUTO-COMBO.md) · 🇧🇬 [bg](../bg/AUTO-COMBO.md) · 🇸🇰 [sk](../sk/AUTO-COMBO.md) · 🇺🇦 [uk-UA](../uk-UA/AUTO-COMBO.md) · 🇮🇱 [he](../he/AUTO-COMBO.md) · 🇵🇭 [phi](../phi/AUTO-COMBO.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Auto-Combo Engine
|
||||
|
||||
> Self-managing model chains with adaptive scoring
|
||||
|
||||
## How It Works
|
||||
|
||||
The Auto-Combo Engine dynamically selects the best provider/model for each request using a **6-factor scoring function**:
|
||||
|
||||
| Factor | Weight | Description |
|
||||
| :--------- | :----- | :---------------------------------------------- |
|
||||
| Quota | 0.20 | Remaining capacity [0..1] |
|
||||
| Health | 0.25 | Circuit breaker: CLOSED=1.0, HALF=0.5, OPEN=0.0 |
|
||||
| CostInv | 0.20 | Inverse cost (cheaper = higher score) |
|
||||
| LatencyInv | 0.15 | Inverse p95 latency (faster = higher) |
|
||||
| TaskFit | 0.10 | Model × task type fitness score |
|
||||
| Stability | 0.10 | Low variance in latency/errors |
|
||||
|
||||
## Mode Packs
|
||||
|
||||
| Pack | Focus | Key Weight |
|
||||
| :---------------------- | :----------- | :--------------- |
|
||||
| 🚀 **Ship Fast** | Speed | latencyInv: 0.35 |
|
||||
| 💰 **Cost Saver** | Economy | costInv: 0.40 |
|
||||
| 🎯 **Quality First** | Best model | taskFit: 0.40 |
|
||||
| 📡 **Offline Friendly** | Availability | quota: 0.40 |
|
||||
|
||||
## Self-Healing
|
||||
|
||||
- **Temporary exclusion**: Score < 0.2 → excluded for 5 min (progressive backoff, max 30 min)
|
||||
- **Circuit breaker awareness**: OPEN → auto-excluded; HALF_OPEN → probe requests
|
||||
- **Incident mode**: >50% OPEN → disable exploration, maximize stability
|
||||
- **Cooldown recovery**: After exclusion, first request is a "probe" with reduced timeout
|
||||
|
||||
## Bandit Exploration
|
||||
|
||||
5% of requests (configurable) are routed to random providers for exploration. Disabled in incident mode.
|
||||
|
||||
## API
|
||||
|
||||
```bash
|
||||
# Create auto-combo
|
||||
curl -X POST http://localhost:20128/api/combos/auto \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"id":"my-auto","name":"Auto Coder","candidatePool":["anthropic","google","openai"],"modePack":"ship-fast"}'
|
||||
|
||||
# List auto-combos
|
||||
curl http://localhost:20128/api/combos/auto
|
||||
```
|
||||
|
||||
## Task Fitness
|
||||
|
||||
30+ models scored across 6 task types (`coding`, `review`, `planning`, `analysis`, `debugging`, `documentation`). Supports wildcard patterns (e.g., `*-coder` → high coding score).
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------ |
|
||||
| `open-sse/services/autoCombo/scoring.ts` | Scoring function & pool normalization |
|
||||
| `open-sse/services/autoCombo/taskFitness.ts` | Model × task fitness lookup |
|
||||
| `open-sse/services/autoCombo/engine.ts` | Selection logic, bandit, budget cap |
|
||||
| `open-sse/services/autoCombo/selfHealing.ts` | Exclusion, probes, incident mode |
|
||||
| `open-sse/services/autoCombo/modePacks.ts` | 4 weight profiles |
|
||||
| `src/app/api/combos/auto/route.ts` | REST API |
|
||||
@@ -0,0 +1,593 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/CODEBASE_DOCUMENTATION.md) · 🇪🇸 [es](../es/CODEBASE_DOCUMENTATION.md) · 🇫🇷 [fr](../fr/CODEBASE_DOCUMENTATION.md) · 🇩🇪 [de](../de/CODEBASE_DOCUMENTATION.md) · 🇮🇹 [it](../it/CODEBASE_DOCUMENTATION.md) · 🇷🇺 [ru](../ru/CODEBASE_DOCUMENTATION.md) · 🇨🇳 [zh-CN](../zh-CN/CODEBASE_DOCUMENTATION.md) · 🇯🇵 [ja](../ja/CODEBASE_DOCUMENTATION.md) · 🇰🇷 [ko](../ko/CODEBASE_DOCUMENTATION.md) · 🇸🇦 [ar](../ar/CODEBASE_DOCUMENTATION.md) · 🇮🇳 [in](../in/CODEBASE_DOCUMENTATION.md) · 🇹🇭 [th](../th/CODEBASE_DOCUMENTATION.md) · 🇻🇳 [vi](../vi/CODEBASE_DOCUMENTATION.md) · 🇮🇩 [id](../id/CODEBASE_DOCUMENTATION.md) · 🇲🇾 [ms](../ms/CODEBASE_DOCUMENTATION.md) · 🇳🇱 [nl](../nl/CODEBASE_DOCUMENTATION.md) · 🇵🇱 [pl](../pl/CODEBASE_DOCUMENTATION.md) · 🇸🇪 [sv](../sv/CODEBASE_DOCUMENTATION.md) · 🇳🇴 [no](../no/CODEBASE_DOCUMENTATION.md) · 🇩🇰 [da](../da/CODEBASE_DOCUMENTATION.md) · 🇫🇮 [fi](../fi/CODEBASE_DOCUMENTATION.md) · 🇵🇹 [pt](../pt/CODEBASE_DOCUMENTATION.md) · 🇷🇴 [ro](../ro/CODEBASE_DOCUMENTATION.md) · 🇭🇺 [hu](../hu/CODEBASE_DOCUMENTATION.md) · 🇧🇬 [bg](../bg/CODEBASE_DOCUMENTATION.md) · 🇸🇰 [sk](../sk/CODEBASE_DOCUMENTATION.md) · 🇺🇦 [uk-UA](../uk-UA/CODEBASE_DOCUMENTATION.md) · 🇮🇱 [he](../he/CODEBASE_DOCUMENTATION.md) · 🇵🇭 [phi](../phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
---
|
||||
|
||||
# omniroute — Codebase Documentation
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](CODEBASE_DOCUMENTATION.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/CODEBASE_DOCUMENTATION.md) | 🇪🇸 [Español](i18n/es/CODEBASE_DOCUMENTATION.md) | 🇫🇷 [Français](i18n/fr/CODEBASE_DOCUMENTATION.md) | 🇮🇹 [Italiano](i18n/it/CODEBASE_DOCUMENTATION.md) | 🇷🇺 [Русский](i18n/ru/CODEBASE_DOCUMENTATION.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/CODEBASE_DOCUMENTATION.md) | 🇩🇪 [Deutsch](i18n/de/CODEBASE_DOCUMENTATION.md) | 🇮🇳 [हिन्दी](i18n/in/CODEBASE_DOCUMENTATION.md) | 🇹🇭 [ไทย](i18n/th/CODEBASE_DOCUMENTATION.md) | 🇺🇦 [Українська](i18n/uk-UA/CODEBASE_DOCUMENTATION.md) | 🇸🇦 [العربية](i18n/ar/CODEBASE_DOCUMENTATION.md) | 🇯🇵 [日本語](i18n/ja/CODEBASE_DOCUMENTATION.md) | 🇻🇳 [Tiếng Việt](i18n/vi/CODEBASE_DOCUMENTATION.md) | 🇧🇬 [Български](i18n/bg/CODEBASE_DOCUMENTATION.md) | 🇩🇰 [Dansk](i18n/da/CODEBASE_DOCUMENTATION.md) | 🇫🇮 [Suomi](i18n/fi/CODEBASE_DOCUMENTATION.md) | 🇮🇱 [עברית](i18n/he/CODEBASE_DOCUMENTATION.md) | 🇭🇺 [Magyar](i18n/hu/CODEBASE_DOCUMENTATION.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/CODEBASE_DOCUMENTATION.md) | 🇰🇷 [한국어](i18n/ko/CODEBASE_DOCUMENTATION.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/CODEBASE_DOCUMENTATION.md) | 🇳🇱 [Nederlands](i18n/nl/CODEBASE_DOCUMENTATION.md) | 🇳🇴 [Norsk](i18n/no/CODEBASE_DOCUMENTATION.md) | 🇵🇹 [Português (Portugal)](i18n/pt/CODEBASE_DOCUMENTATION.md) | 🇷🇴 [Română](i18n/ro/CODEBASE_DOCUMENTATION.md) | 🇵🇱 [Polski](i18n/pl/CODEBASE_DOCUMENTATION.md) | 🇸🇰 [Slovenčina](i18n/sk/CODEBASE_DOCUMENTATION.md) | 🇸🇪 [Svenska](i18n/sv/CODEBASE_DOCUMENTATION.md) | 🇵🇭 [Filipino](i18n/phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
> A comprehensive, beginner-friendly guide to the **omniroute** multi-provider AI proxy router.
|
||||
|
||||
---
|
||||
|
||||
## 1. What Is omniroute?
|
||||
|
||||
omniroute is a **proxy router** that sits between AI clients (Claude CLI, Codex, Cursor IDE, etc.) and AI providers (Anthropic, Google, OpenAI, AWS, GitHub, etc.). It solves one big problem:
|
||||
|
||||
> **Different AI clients speak different "languages" (API formats), and different AI providers expect different "languages" too.** omniroute translates between them automatically.
|
||||
|
||||
Think of it like a universal translator at the United Nations — any delegate can speak any language, and the translator converts it for any other delegate.
|
||||
|
||||
---
|
||||
|
||||
## 2. Architecture Overview
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph Clients
|
||||
A[Claude CLI]
|
||||
B[Codex]
|
||||
C[Cursor IDE]
|
||||
D[OpenAI-compatible]
|
||||
end
|
||||
|
||||
subgraph omniroute
|
||||
E[Handler Layer]
|
||||
F[Translator Layer]
|
||||
G[Executor Layer]
|
||||
H[Services Layer]
|
||||
end
|
||||
|
||||
subgraph Providers
|
||||
I[Anthropic Claude]
|
||||
J[Google Gemini]
|
||||
K[OpenAI / Codex]
|
||||
L[GitHub Copilot]
|
||||
M[AWS Kiro]
|
||||
N[Antigravity]
|
||||
O[Cursor API]
|
||||
end
|
||||
|
||||
A --> E
|
||||
B --> E
|
||||
C --> E
|
||||
D --> E
|
||||
E --> F
|
||||
F --> G
|
||||
G --> I
|
||||
G --> J
|
||||
G --> K
|
||||
G --> L
|
||||
G --> M
|
||||
G --> N
|
||||
G --> O
|
||||
H -.-> E
|
||||
H -.-> G
|
||||
```
|
||||
|
||||
### Core Principle: Hub-and-Spoke Translation
|
||||
|
||||
All format translation passes through **OpenAI format as the hub**:
|
||||
|
||||
```
|
||||
Client Format → [OpenAI Hub] → Provider Format (request)
|
||||
Provider Format → [OpenAI Hub] → Client Format (response)
|
||||
```
|
||||
|
||||
This means you only need **N translators** (one per format) instead of **N²** (every pair).
|
||||
|
||||
---
|
||||
|
||||
## 3. Project Structure
|
||||
|
||||
```
|
||||
omniroute/
|
||||
├── open-sse/ ← Core proxy library (portable, framework-agnostic)
|
||||
│ ├── index.js ← Main entry point, exports everything
|
||||
│ ├── config/ ← Configuration & constants
|
||||
│ ├── executors/ ← Provider-specific request execution
|
||||
│ ├── handlers/ ← Request handling orchestration
|
||||
│ ├── services/ ← Business logic (auth, models, fallback, usage)
|
||||
│ ├── translator/ ← Format translation engine
|
||||
│ │ ├── request/ ← Request translators (8 files)
|
||||
│ │ ├── response/ ← Response translators (7 files)
|
||||
│ │ └── helpers/ ← Shared translation utilities (6 files)
|
||||
│ └── utils/ ← Utility functions
|
||||
├── src/ ← Application layer (Express/Worker runtime)
|
||||
│ ├── app/ ← Web UI, API routes, middleware
|
||||
│ ├── lib/ ← Database, auth, and shared library code
|
||||
│ ├── mitm/ ← Man-in-the-middle proxy utilities
|
||||
│ ├── models/ ← Database models
|
||||
│ ├── shared/ ← Shared utilities (wrappers around open-sse)
|
||||
│ ├── sse/ ← SSE endpoint handlers
|
||||
│ └── store/ ← State management
|
||||
├── data/ ← Runtime data (credentials, logs)
|
||||
│ └── provider-credentials.json (external credentials override, gitignored)
|
||||
└── tester/ ← Test utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Module-by-Module Breakdown
|
||||
|
||||
### 4.1 Config (`open-sse/config/`)
|
||||
|
||||
The **single source of truth** for all provider configuration.
|
||||
|
||||
| File | Purpose |
|
||||
| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `constants.ts` | `PROVIDERS` object with base URLs, OAuth credentials (defaults), headers, and default system prompts for every provider. Also defines `HTTP_STATUS`, `ERROR_TYPES`, `COOLDOWN_MS`, `BACKOFF_CONFIG`, and `SKIP_PATTERNS`. |
|
||||
| `credentialLoader.ts` | Loads external credentials from `data/provider-credentials.json` and merges them over the hardcoded defaults in `PROVIDERS`. Keeps secrets out of source control while maintaining backwards compatibility. |
|
||||
| `providerModels.ts` | Central model registry: maps provider aliases → model IDs. Functions like `getModels()`, `getProviderByAlias()`. |
|
||||
| `codexInstructions.ts` | System instructions injected into Codex requests (editing constraints, sandbox rules, approval policies). |
|
||||
| `defaultThinkingSignature.ts` | Default "thinking" signatures for Claude and Gemini models. |
|
||||
| `ollamaModels.ts` | Schema definition for local Ollama models (name, size, family, quantization). |
|
||||
|
||||
#### Credential Loading Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["App starts"] --> B["constants.ts defines PROVIDERS\nwith hardcoded defaults"]
|
||||
B --> C{"data/provider-credentials.json\nexists?"}
|
||||
C -->|Yes| D["credentialLoader reads JSON"]
|
||||
C -->|No| E["Use hardcoded defaults"]
|
||||
D --> F{"For each provider in JSON"}
|
||||
F --> G{"Provider exists\nin PROVIDERS?"}
|
||||
G -->|No| H["Log warning, skip"]
|
||||
G -->|Yes| I{"Value is object?"}
|
||||
I -->|No| J["Log warning, skip"]
|
||||
I -->|Yes| K["Merge clientId, clientSecret,\ntokenUrl, authUrl, refreshUrl"]
|
||||
K --> F
|
||||
H --> F
|
||||
J --> F
|
||||
F -->|Done| L["PROVIDERS ready with\nmerged credentials"]
|
||||
E --> L
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 Executors (`open-sse/executors/`)
|
||||
|
||||
Executors encapsulate **provider-specific logic** using the **Strategy Pattern**. Each executor overrides base methods as needed.
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BaseExecutor {
|
||||
+buildUrl(model, stream, options)
|
||||
+buildHeaders(credentials, stream, body)
|
||||
+transformRequest(body, model, stream, credentials)
|
||||
+execute(url, options)
|
||||
+shouldRetry(status, error)
|
||||
+refreshCredentials(credentials, log)
|
||||
}
|
||||
|
||||
class DefaultExecutor {
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class AntigravityExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+shouldRetry()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class CursorExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseResponse()
|
||||
+generateChecksum()
|
||||
}
|
||||
|
||||
class KiroExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseEventStream()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
BaseExecutor <|-- DefaultExecutor
|
||||
BaseExecutor <|-- AntigravityExecutor
|
||||
BaseExecutor <|-- CursorExecutor
|
||||
BaseExecutor <|-- KiroExecutor
|
||||
BaseExecutor <|-- CodexExecutor
|
||||
BaseExecutor <|-- GeminiCLIExecutor
|
||||
BaseExecutor <|-- GithubExecutor
|
||||
```
|
||||
|
||||
| Executor | Provider | Key Specializations |
|
||||
| ---------------- | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- |
|
||||
| `base.ts` | — | Abstract base: URL building, headers, retry logic, credential refresh |
|
||||
| `default.ts` | Claude, Gemini, OpenAI, GLM, Kimi, MiniMax | Generic OAuth token refresh for standard providers |
|
||||
| `antigravity.ts` | Google Cloud Code | Project/session ID generation, multi-URL fallback, custom retry parsing from error messages ("reset after 2h7m23s") |
|
||||
| `cursor.ts` | Cursor IDE | **Most complex**: SHA-256 checksum auth, Protobuf request encoding, binary EventStream → SSE response parsing |
|
||||
| `codex.ts` | OpenAI Codex | Injects system instructions, manages thinking levels, removes unsupported parameters |
|
||||
| `gemini-cli.ts` | Google Gemini CLI | Custom URL building (`streamGenerateContent`), Google OAuth token refresh |
|
||||
| `github.ts` | GitHub Copilot | Dual token system (GitHub OAuth + Copilot token), VSCode header mimicking |
|
||||
| `kiro.ts` | AWS CodeWhisperer | AWS EventStream binary parsing, AMZN event frames, token estimation |
|
||||
| `index.ts` | — | Factory: maps provider name → executor class, with default fallback |
|
||||
|
||||
---
|
||||
|
||||
### 4.3 Handlers (`open-sse/handlers/`)
|
||||
|
||||
The **orchestration layer** — coordinates translation, execution, streaming, and error handling.
|
||||
|
||||
| File | Purpose |
|
||||
| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `chatCore.ts` | **Central orchestrator** (~600 lines). Handles the complete request lifecycle: format detection → translation → executor dispatch → streaming/non-streaming response → token refresh → error handling → usage logging. |
|
||||
| `responsesHandler.ts` | Adapter for OpenAI's Responses API: converts Responses format → Chat Completions → sends to `chatCore` → converts SSE back to Responses format. |
|
||||
| `embeddings.ts` | Embedding generation handler: resolves embedding model → provider, dispatches to provider API, returns OpenAI-compatible embedding response. Supports 6+ providers. |
|
||||
| `imageGeneration.ts` | Image generation handler: resolves image model → provider, supports OpenAI-compatible, Gemini-image (Antigravity), and fallback (Nebius) modes. Returns base64 or URL images. |
|
||||
|
||||
#### Request Lifecycle (chatCore.ts)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant chatCore
|
||||
participant Translator
|
||||
participant Executor
|
||||
participant Provider
|
||||
|
||||
Client->>chatCore: Request (any format)
|
||||
chatCore->>chatCore: Detect source format
|
||||
chatCore->>chatCore: Check bypass patterns
|
||||
chatCore->>chatCore: Resolve model & provider
|
||||
chatCore->>Translator: Translate request (source → OpenAI → target)
|
||||
chatCore->>Executor: Get executor for provider
|
||||
Executor->>Executor: Build URL, headers, transform request
|
||||
Executor->>Executor: Refresh credentials if needed
|
||||
Executor->>Provider: HTTP fetch (streaming or non-streaming)
|
||||
|
||||
alt Streaming
|
||||
Provider-->>chatCore: SSE stream
|
||||
chatCore->>chatCore: Pipe through SSE transform stream
|
||||
Note over chatCore: Transform stream translates<br/>each chunk: target → OpenAI → source
|
||||
chatCore-->>Client: Translated SSE stream
|
||||
else Non-streaming
|
||||
Provider-->>chatCore: JSON response
|
||||
chatCore->>Translator: Translate response
|
||||
chatCore-->>Client: Translated JSON
|
||||
end
|
||||
|
||||
alt Error (401, 429, 500...)
|
||||
chatCore->>Executor: Retry with credential refresh
|
||||
chatCore->>chatCore: Account fallback logic
|
||||
end
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.4 Services (`open-sse/services/`)
|
||||
|
||||
Business logic that supports the handlers and executors.
|
||||
|
||||
| File | Purpose |
|
||||
| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `provider.ts` | **Format detection** (`detectFormat`): analyzes request body structure to identify Claude/OpenAI/Gemini/Antigravity/Responses formats (includes `max_tokens` heuristic for Claude). Also: URL building, header building, thinking config normalization. Supports `openai-compatible-*` and `anthropic-compatible-*` dynamic providers. |
|
||||
| `model.ts` | Model string parsing (`claude/model-name` → `{provider: "claude", model: "model-name"}`), alias resolution with collision detection, input sanitization (rejects path traversal/control chars), and model info resolution with async alias getter support. |
|
||||
| `accountFallback.ts` | Rate-limit handling: exponential backoff (1s → 2s → 4s → max 2min), account cooldown management, error classification (which errors trigger fallback vs. not). |
|
||||
| `tokenRefresh.ts` | OAuth token refresh for **every provider**: Google (Gemini, Antigravity), Claude, Codex, Qwen, iFlow, GitHub (OAuth + Copilot dual-token), Kiro (AWS SSO OIDC + Social Auth). Includes in-flight promise deduplication cache and retry with exponential backoff. |
|
||||
| `combo.ts` | **Combo models**: chains of fallback models. If model A fails with a fallback-eligible error, try model B, then C, etc. Returns actual upstream status codes. |
|
||||
| `usage.ts` | Fetches quota/usage data from provider APIs (GitHub Copilot quotas, Antigravity model quotas, Codex rate limits, Kiro usage breakdowns, Claude settings). |
|
||||
| `accountSelector.ts` | Smart account selection with scoring algorithm: considers priority, health status, round-robin position, and cooldown state to pick the optimal account for each request. |
|
||||
| `contextManager.ts` | Request context lifecycle management: creates and tracks per-request context objects with metadata (request ID, timestamps, provider info) for debugging and logging. |
|
||||
| `ipFilter.ts` | IP-based access control: supports allowlist and blocklist modes. Validates client IP against configured rules before processing API requests. |
|
||||
| `sessionManager.ts` | Session tracking with client fingerprinting: tracks active sessions using hashed client identifiers, monitors request counts, and provides session metrics. |
|
||||
| `signatureCache.ts` | Request signature-based deduplication cache: prevents duplicate requests by caching recent request signatures and returning cached responses for identical requests within a time window. |
|
||||
| `systemPrompt.ts` | Global system prompt injection: prepends or appends a configurable system prompt to all requests, with per-provider compatibility handling. |
|
||||
| `thinkingBudget.ts` | Reasoning token budget management: supports passthrough, auto (strip thinking config), custom (fixed budget), and adaptive (complexity-scaled) modes for controlling thinking/reasoning tokens. |
|
||||
| `wildcardRouter.ts` | Wildcard model pattern routing: resolves wildcard patterns (e.g., `*/claude-*`) to concrete provider/model pairs based on availability and priority. |
|
||||
|
||||
#### Token Refresh Deduplication
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant R1 as Request 1
|
||||
participant R2 as Request 2
|
||||
participant Cache as refreshPromiseCache
|
||||
participant OAuth as OAuth Provider
|
||||
|
||||
R1->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: No in-flight promise
|
||||
Cache->>OAuth: Start refresh
|
||||
R2->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: Found in-flight promise
|
||||
Cache-->>R2: Return existing promise
|
||||
OAuth-->>Cache: New access token
|
||||
Cache-->>R1: New access token
|
||||
Cache-->>R2: Same access token (shared)
|
||||
Cache->>Cache: Delete cache entry
|
||||
```
|
||||
|
||||
#### Account Fallback State Machine
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> Active
|
||||
Active --> Error: Request fails (401/429/500)
|
||||
Error --> Cooldown: Apply backoff
|
||||
Cooldown --> Active: Cooldown expires
|
||||
Active --> Active: Request succeeds (reset backoff)
|
||||
|
||||
state Error {
|
||||
[*] --> ClassifyError
|
||||
ClassifyError --> ShouldFallback: Rate limit / Auth / Transient
|
||||
ClassifyError --> NoFallback: 400 Bad Request
|
||||
}
|
||||
|
||||
state Cooldown {
|
||||
[*] --> ExponentialBackoff
|
||||
ExponentialBackoff: Level 0 = 1s
|
||||
ExponentialBackoff: Level 1 = 2s
|
||||
ExponentialBackoff: Level 2 = 4s
|
||||
ExponentialBackoff: Max = 2min
|
||||
}
|
||||
```
|
||||
|
||||
#### Combo Model Chain
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Request with\ncombo model"] --> B["Model A"]
|
||||
B -->|"2xx Success"| C["Return response"]
|
||||
B -->|"429/401/500"| D{"Fallback\neligible?"}
|
||||
D -->|Yes| E["Model B"]
|
||||
D -->|No| F["Return error"]
|
||||
E -->|"2xx Success"| C
|
||||
E -->|"429/401/500"| G{"Fallback\neligible?"}
|
||||
G -->|Yes| H["Model C"]
|
||||
G -->|No| F
|
||||
H -->|"2xx Success"| C
|
||||
H -->|"Fail"| I["All failed →\nReturn last status"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.5 Translator (`open-sse/translator/`)
|
||||
|
||||
The **format translation engine** using a self-registering plugin system.
|
||||
|
||||
#### Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "Request Translation"
|
||||
A["Claude → OpenAI"]
|
||||
B["Gemini → OpenAI"]
|
||||
C["Antigravity → OpenAI"]
|
||||
D["OpenAI Responses → OpenAI"]
|
||||
E["OpenAI → Claude"]
|
||||
F["OpenAI → Gemini"]
|
||||
G["OpenAI → Kiro"]
|
||||
H["OpenAI → Cursor"]
|
||||
end
|
||||
|
||||
subgraph "Response Translation"
|
||||
I["Claude → OpenAI"]
|
||||
J["Gemini → OpenAI"]
|
||||
K["Kiro → OpenAI"]
|
||||
L["Cursor → OpenAI"]
|
||||
M["OpenAI → Claude"]
|
||||
N["OpenAI → Antigravity"]
|
||||
O["OpenAI → Responses"]
|
||||
end
|
||||
```
|
||||
|
||||
| Directory | Files | Description |
|
||||
| ------------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `request/` | 8 translators | Convert request bodies between formats. Each file self-registers via `register(from, to, fn)` on import. |
|
||||
| `response/` | 7 translators | Convert streaming response chunks between formats. Handles SSE event types, thinking blocks, tool calls. |
|
||||
| `helpers/` | 6 helpers | Shared utilities: `claudeHelper` (system prompt extraction, thinking config), `geminiHelper` (parts/contents mapping), `openaiHelper` (format filtering), `toolCallHelper` (ID generation, missing response injection), `maxTokensHelper`, `responsesApiHelper`. |
|
||||
| `index.ts` | — | Translation engine: `translateRequest()`, `translateResponse()`, state management, registry. |
|
||||
| `formats.ts` | — | Format constants: `OPENAI`, `CLAUDE`, `GEMINI`, `ANTIGRAVITY`, `KIRO`, `CURSOR`, `OPENAI_RESPONSES`. |
|
||||
|
||||
#### Key Design: Self-Registering Plugins
|
||||
|
||||
```javascript
|
||||
// Each translator file calls register() on import:
|
||||
import { register } from "../index.js";
|
||||
register("claude", "openai", translateClaudeToOpenAI);
|
||||
|
||||
// The index.js imports all translator files, triggering registration:
|
||||
import "./request/claude-to-openai.js"; // ← self-registers
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.6 Utils (`open-sse/utils/`)
|
||||
|
||||
| File | Purpose |
|
||||
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `error.ts` | Error response building (OpenAI-compatible format), upstream error parsing, Antigravity retry-time extraction from error messages, SSE error streaming. |
|
||||
| `stream.ts` | **SSE Transform Stream** — the core streaming pipeline. Two modes: `TRANSLATE` (full format translation) and `PASSTHROUGH` (normalize + extract usage). Handles chunk buffering, usage estimation, content length tracking. Per-stream encoder/decoder instances avoid shared state. |
|
||||
| `streamHelpers.ts` | Low-level SSE utilities: `parseSSELine` (whitespace-tolerant), `hasValuableContent` (filters empty chunks for OpenAI/Claude/Gemini), `fixInvalidId`, `formatSSE` (format-aware SSE serialization with `perf_metrics` cleanup). |
|
||||
| `usageTracking.ts` | Token usage extraction from any format (Claude/OpenAI/Gemini/Responses), estimation with separate tool/message char-per-token ratios, buffer addition (2000 tokens safety margin), format-specific field filtering, console logging with ANSI colors. |
|
||||
| `requestLogger.ts` | File-based request logging (opt-in via `ENABLE_REQUEST_LOGS=true`). Creates session folders with numbered files: `1_req_client.json` → `7_res_client.txt`. All I/O is async (fire-and-forget). Masks sensitive headers. |
|
||||
| `bypassHandler.ts` | Intercepts specific patterns from Claude CLI (title extraction, warmup, count) and returns fake responses without calling any provider. Supports both streaming and non-streaming. Intentionally limited to Claude CLI scope. |
|
||||
| `networkProxy.ts` | Resolves outbound proxy URL for a given provider with precedence: provider-specific config → global config → environment variables (`HTTPS_PROXY`/`HTTP_PROXY`/`ALL_PROXY`). Supports `NO_PROXY` exclusions. Caches config for 30s. |
|
||||
|
||||
#### SSE Streaming Pipeline
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["Provider SSE stream"] --> B["TextDecoder\n(per-stream instance)"]
|
||||
B --> C["Buffer lines\n(split on newline)"]
|
||||
C --> D["parseSSELine()\n(trim whitespace, parse JSON)"]
|
||||
D --> E{"Mode?"}
|
||||
E -->|TRANSLATE| F["translateResponse()\ntarget → OpenAI → source"]
|
||||
E -->|PASSTHROUGH| G["fixInvalidId()\nnormalize chunk"]
|
||||
F --> H["hasValuableContent()\nfilter empty chunks"]
|
||||
G --> H
|
||||
H -->|"Has content"| I["extractUsage()\ntrack token counts"]
|
||||
H -->|"Empty"| J["Skip chunk"]
|
||||
I --> K["formatSSE()\nserialize + clean perf_metrics"]
|
||||
K --> L["TextEncoder\n(per-stream instance)"]
|
||||
L --> M["Enqueue to\nclient stream"]
|
||||
|
||||
style A fill:#f9f,stroke:#333
|
||||
style M fill:#9f9,stroke:#333
|
||||
```
|
||||
|
||||
#### Request Logger Session Structure
|
||||
|
||||
```
|
||||
logs/
|
||||
└── claude_gemini_claude-sonnet_20260208_143045/
|
||||
├── 1_req_client.json ← Raw client request
|
||||
├── 2_req_source.json ← After initial conversion
|
||||
├── 3_req_openai.json ← OpenAI intermediate format
|
||||
├── 4_req_target.json ← Final target format
|
||||
├── 5_res_provider.txt ← Provider SSE chunks (streaming)
|
||||
├── 5_res_provider.json ← Provider response (non-streaming)
|
||||
├── 6_res_openai.txt ← OpenAI intermediate chunks
|
||||
├── 7_res_client.txt ← Client-facing SSE chunks
|
||||
└── 6_error.json ← Error details (if any)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.7 Application Layer (`src/`)
|
||||
|
||||
| Directory | Purpose |
|
||||
| ------------- | ---------------------------------------------------------------------- |
|
||||
| `src/app/` | Web UI, API routes, Express middleware, OAuth callback handlers |
|
||||
| `src/lib/` | Database access (`localDb.ts`, `usageDb.ts`), authentication, shared |
|
||||
| `src/mitm/` | Man-in-the-middle proxy utilities for intercepting provider traffic |
|
||||
| `src/models/` | Database model definitions |
|
||||
| `src/shared/` | Wrappers around open-sse functions (provider, stream, error, etc.) |
|
||||
| `src/sse/` | SSE endpoint handlers that wire the open-sse library to Express routes |
|
||||
| `src/store/` | Application state management |
|
||||
|
||||
#### Notable API Routes
|
||||
|
||||
| Route | Methods | Purpose |
|
||||
| --------------------------------------------- | --------------- | ------------------------------------------------------------------------------------- |
|
||||
| `/api/provider-models` | GET/POST/DELETE | CRUD for custom models per provider |
|
||||
| `/api/models/catalog` | GET | Aggregated catalog of all models (chat, embedding, image, custom) grouped by provider |
|
||||
| `/api/settings/proxy` | GET/PUT/DELETE | Hierarchical outbound proxy configuration (`global/providers/combos/keys`) |
|
||||
| `/api/settings/proxy/test` | POST | Validates proxy connectivity and returns public IP/latency |
|
||||
| `/v1/providers/[provider]/chat/completions` | POST | Dedicated per-provider chat completions with model validation |
|
||||
| `/v1/providers/[provider]/embeddings` | POST | Dedicated per-provider embeddings with model validation |
|
||||
| `/v1/providers/[provider]/images/generations` | POST | Dedicated per-provider image generation with model validation |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist management |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget configuration (passthrough/auto/custom/adaptive) |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt injection for all requests |
|
||||
| `/api/sessions` | GET | Active session tracking and metrics |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
|
||||
---
|
||||
|
||||
## 5. Key Design Patterns
|
||||
|
||||
### 5.1 Hub-and-Spoke Translation
|
||||
|
||||
All formats translate through **OpenAI format as the hub**. Adding a new provider only requires writing **one pair** of translators (to/from OpenAI), not N pairs.
|
||||
|
||||
### 5.2 Executor Strategy Pattern
|
||||
|
||||
Each provider has a dedicated executor class inheriting from `BaseExecutor`. The factory in `executors/index.ts` selects the right one at runtime.
|
||||
|
||||
### 5.3 Self-Registering Plugin System
|
||||
|
||||
Translator modules register themselves on import via `register()`. Adding a new translator is just creating a file and importing it.
|
||||
|
||||
### 5.4 Account Fallback with Exponential Backoff
|
||||
|
||||
When a provider returns 429/401/500, the system can switch to the next account, applying exponential cooldowns (1s → 2s → 4s → max 2min).
|
||||
|
||||
### 5.5 Combo Model Chains
|
||||
|
||||
A "combo" groups multiple `provider/model` strings. If the first fails, fallback to the next automatically.
|
||||
|
||||
### 5.6 Stateful Streaming Translation
|
||||
|
||||
Response translation maintains state across SSE chunks (thinking block tracking, tool call accumulation, content block indexing) via the `initState()` mechanism.
|
||||
|
||||
### 5.7 Usage Safety Buffer
|
||||
|
||||
A 2000-token buffer is added to reported usage to prevent clients from hitting context window limits due to overhead from system prompts and format translation.
|
||||
|
||||
---
|
||||
|
||||
## 6. Supported Formats
|
||||
|
||||
| Format | Direction | Identifier |
|
||||
| ----------------------- | --------------- | ------------------ |
|
||||
| OpenAI Chat Completions | source + target | `openai` |
|
||||
| OpenAI Responses API | source + target | `openai-responses` |
|
||||
| Anthropic Claude | source + target | `claude` |
|
||||
| Google Gemini | source + target | `gemini` |
|
||||
| Google Gemini CLI | target only | `gemini-cli` |
|
||||
| Antigravity | source + target | `antigravity` |
|
||||
| AWS Kiro | target only | `kiro` |
|
||||
| Cursor | target only | `cursor` |
|
||||
|
||||
---
|
||||
|
||||
## 7. Supported Providers
|
||||
|
||||
| Provider | Auth Method | Executor | Key Notes |
|
||||
| ------------------------ | ---------------------- | ----------- | --------------------------------------------- |
|
||||
| Anthropic Claude | API key or OAuth | Default | Uses `x-api-key` header |
|
||||
| Google Gemini | API key or OAuth | Default | Uses `x-goog-api-key` header |
|
||||
| Google Gemini CLI | OAuth | GeminiCLI | Uses `streamGenerateContent` endpoint |
|
||||
| Antigravity | OAuth | Antigravity | Multi-URL fallback, custom retry parsing |
|
||||
| OpenAI | API key | Default | Standard Bearer auth |
|
||||
| Codex | OAuth | Codex | Injects system instructions, manages thinking |
|
||||
| GitHub Copilot | OAuth + Copilot token | Github | Dual token, VSCode header mimicking |
|
||||
| Kiro (AWS) | AWS SSO OIDC or Social | Kiro | Binary EventStream parsing |
|
||||
| Cursor IDE | Checksum auth | Cursor | Protobuf encoding, SHA-256 checksums |
|
||||
| Qwen | OAuth | Default | Standard auth |
|
||||
| iFlow | OAuth (Basic + Bearer) | Default | Dual auth header |
|
||||
| OpenRouter | API key | Default | Standard Bearer auth |
|
||||
| GLM, Kimi, MiniMax | API key | Default | Claude-compatible, use `x-api-key` |
|
||||
| `openai-compatible-*` | API key | Default | Dynamic: any OpenAI-compatible endpoint |
|
||||
| `anthropic-compatible-*` | API key | Default | Dynamic: any Claude-compatible endpoint |
|
||||
|
||||
---
|
||||
|
||||
## 8. Data Flow Summary
|
||||
|
||||
### Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor\nbuildUrl + buildHeaders"]
|
||||
D --> E["fetch(providerURL)"]
|
||||
E --> F["createSSEStream()\nTRANSLATE mode"]
|
||||
F --> G["parseSSELine()"]
|
||||
G --> H["translateResponse()\ntarget → OpenAI → source"]
|
||||
H --> I["extractUsage()\n+ addBuffer"]
|
||||
I --> J["formatSSE()"]
|
||||
J --> K["Client receives\ntranslated SSE"]
|
||||
K --> L["logUsage()\nsaveRequestUsage()"]
|
||||
```
|
||||
|
||||
### Non-Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor.execute()"]
|
||||
D --> E["translateResponse()\ntarget → OpenAI → source"]
|
||||
E --> F["Return JSON\nresponse"]
|
||||
```
|
||||
|
||||
### Bypass Flow (Claude CLI)
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Claude CLI request"] --> B{"Match bypass\npattern?"}
|
||||
B -->|"Title/Warmup/Count"| C["Generate fake\nOpenAI response"]
|
||||
B -->|"No match"| D["Normal flow"]
|
||||
C --> E["Translate to\nsource format"]
|
||||
E --> F["Return without\ncalling provider"]
|
||||
```
|
||||
@@ -0,0 +1,148 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/FEATURES.md) · 🇪🇸 [es](../es/FEATURES.md) · 🇫🇷 [fr](../fr/FEATURES.md) · 🇩🇪 [de](../de/FEATURES.md) · 🇮🇹 [it](../it/FEATURES.md) · 🇷🇺 [ru](../ru/FEATURES.md) · 🇨🇳 [zh-CN](../zh-CN/FEATURES.md) · 🇯🇵 [ja](../ja/FEATURES.md) · 🇰🇷 [ko](../ko/FEATURES.md) · 🇸🇦 [ar](../ar/FEATURES.md) · 🇮🇳 [in](../in/FEATURES.md) · 🇹🇭 [th](../th/FEATURES.md) · 🇻🇳 [vi](../vi/FEATURES.md) · 🇮🇩 [id](../id/FEATURES.md) · 🇲🇾 [ms](../ms/FEATURES.md) · 🇳🇱 [nl](../nl/FEATURES.md) · 🇵🇱 [pl](../pl/FEATURES.md) · 🇸🇪 [sv](../sv/FEATURES.md) · 🇳🇴 [no](../no/FEATURES.md) · 🇩🇰 [da](../da/FEATURES.md) · 🇫🇮 [fi](../fi/FEATURES.md) · 🇵🇹 [pt](../pt/FEATURES.md) · 🇷🇴 [ro](../ro/FEATURES.md) · 🇭🇺 [hu](../hu/FEATURES.md) · 🇧🇬 [bg](../bg/FEATURES.md) · 🇸🇰 [sk](../sk/FEATURES.md) · 🇺🇦 [uk-UA](../uk-UA/FEATURES.md) · 🇮🇱 [he](../he/FEATURES.md) · 🇵🇭 [phi](../phi/FEATURES.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Dashboard Features Gallery
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](FEATURES.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/FEATURES.md) | 🇪🇸 [Español](i18n/es/FEATURES.md) | 🇫🇷 [Français](i18n/fr/FEATURES.md) | 🇮🇹 [Italiano](i18n/it/FEATURES.md) | 🇷🇺 [Русский](i18n/ru/FEATURES.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/FEATURES.md) | 🇩🇪 [Deutsch](i18n/de/FEATURES.md) | 🇮🇳 [हिन्दी](i18n/in/FEATURES.md) | 🇹🇭 [ไทย](i18n/th/FEATURES.md) | 🇺🇦 [Українська](i18n/uk-UA/FEATURES.md) | 🇸🇦 [العربية](i18n/ar/FEATURES.md) | 🇯🇵 [日本語](i18n/ja/FEATURES.md) | 🇻🇳 [Tiếng Việt](i18n/vi/FEATURES.md) | 🇧🇬 [Български](i18n/bg/FEATURES.md) | 🇩🇰 [Dansk](i18n/da/FEATURES.md) | 🇫🇮 [Suomi](i18n/fi/FEATURES.md) | 🇮🇱 [עברית](i18n/he/FEATURES.md) | 🇭🇺 [Magyar](i18n/hu/FEATURES.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/FEATURES.md) | 🇰🇷 [한국어](i18n/ko/FEATURES.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/FEATURES.md) | 🇳🇱 [Nederlands](i18n/nl/FEATURES.md) | 🇳🇴 [Norsk](i18n/no/FEATURES.md) | 🇵🇹 [Português (Portugal)](i18n/pt/FEATURES.md) | 🇷🇴 [Română](i18n/ro/FEATURES.md) | 🇵🇱 [Polski](i18n/pl/FEATURES.md) | 🇸🇰 [Slovenčina](i18n/sk/FEATURES.md) | 🇸🇪 [Svenska](i18n/sv/FEATURES.md) | 🇵🇭 [Filipino](i18n/phi/FEATURES.md)
|
||||
|
||||
Visual guide to every section of the OmniRoute dashboard.
|
||||
|
||||
---
|
||||
|
||||
## 🔌 Providers
|
||||
|
||||
Manage AI provider connections: OAuth providers (Claude Code, Codex, Gemini CLI), API key providers (Groq, DeepSeek, OpenRouter), and free providers (iFlow, Qwen, Kiro).
|
||||
|
||||
- **Ollama Cloud** — Cloud-hosted Ollama models at `api.ollama.com` (free "Light usage" tier); use `ollamacloud/<model>` prefix
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
Create model routing combos with 6 strategies: priority, weighted, round-robin, random, least-used, and cost-optimized. Each combo chains multiple models with automatic fallback and includes quick templates and readiness checks.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 📊 Analytics
|
||||
|
||||
Comprehensive usage analytics with token consumption, cost estimates, activity heatmaps, weekly distribution charts, and per-provider breakdowns.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🏥 System Health
|
||||
|
||||
Real-time monitoring: uptime, memory, version, latency percentiles (p50/p95/p99), cache statistics, and provider circuit breaker states.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 Translator Playground
|
||||
|
||||
Four modes for debugging API translations: **Playground** (format converter), **Chat Tester** (live requests), **Test Bench** (batch tests), and **Live Monitor** (real-time stream).
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎮 Model Playground _(v2.0.9+)_
|
||||
|
||||
Test any model directly from the dashboard. Select provider, model, and endpoint, write prompts with Monaco Editor, stream responses in real-time, abort mid-stream, and view timing metrics.
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Themes _(v2.0.5+)_
|
||||
|
||||
Customizable color themes for the entire dashboard. Choose from 7 preset colors (Coral, Blue, Red, Green, Violet, Orange, Cyan) or create a custom theme by picking any hex color. Supports light, dark, and system mode.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Settings
|
||||
|
||||
Comprehensive settings panel with tabs:
|
||||
|
||||
- **General** — System storage, backup management (export/import database)
|
||||
- **Appearance** — Theme selector (dark/light/system), color theme presets and custom colors, health log visibility
|
||||
- **Security** — API endpoint protection, custom provider blocking, IP filtering, session info
|
||||
- **Routing** — Model aliases, background task degradation
|
||||
- **Resilience** — Rate limit persistence, circuit breaker tuning
|
||||
- **Advanced** — Configuration overrides
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Tools
|
||||
|
||||
One-click configuration for AI coding tools: Claude Code, Codex CLI, Gemini CLI, OpenClaw, Kilo Code, Antigravity, Cline, Continue, Cursor, and Factory Droid. Features automated config apply/reset, connection profiles, and model mapping.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🤖 CLI Agents _(v2.0.11+)_
|
||||
|
||||
Dashboard for discovering and managing CLI agents. Shows a grid of 14 built-in agents (Codex, Claude, Goose, Gemini CLI, OpenClaw, Aider, OpenCode, Cline, Qwen Code, ForgeCode, Amazon Q, Open Interpreter, Cursor CLI, Warp) with:
|
||||
|
||||
- **Installation status** — Installed / Not Found with version detection
|
||||
- **Protocol badges** — stdio, HTTP, etc.
|
||||
- **Custom agents** — Register any CLI tool via form (name, binary, version command, spawn args)
|
||||
- **CLI Fingerprint Matching** — Per-provider toggle to match native CLI request signatures, reducing ban risk while preserving proxy IP
|
||||
|
||||
---
|
||||
|
||||
## 🖼️ Media _(v2.0.3+)_
|
||||
|
||||
Generate images, videos, and music from the dashboard. Supports OpenAI, xAI, Together, Hyperbolic, SD WebUI, ComfyUI, AnimateDiff, Stable Audio Open, and MusicGen.
|
||||
|
||||
---
|
||||
|
||||
## 📝 Request Logs
|
||||
|
||||
Real-time request logging with filtering by provider, model, account, and API key. Shows status codes, token usage, latency, and response details.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🌐 API Endpoint
|
||||
|
||||
Your unified API endpoint with capability breakdown: Chat Completions, Responses API, Embeddings, Image Generation, Reranking, Audio Transcription, Text-to-Speech, Moderations, and registered API keys. Cloud proxy support for remote access.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔑 API Key Management
|
||||
|
||||
Create, scope, and revoke API keys. Each key can be restricted to specific models/providers with full access or read-only permissions. Visual key management with usage tracking.
|
||||
|
||||
---
|
||||
|
||||
## 📋 Audit Log
|
||||
|
||||
Administrative action tracking with filtering by action type, actor, target, IP address, and timestamp. Full security event history.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application
|
||||
|
||||
Native Electron desktop app for Windows, macOS, and Linux. Run OmniRoute as a standalone application with system tray integration, offline support, auto-update, and one-click install.
|
||||
|
||||
Key features:
|
||||
|
||||
- Server readiness polling (no blank screen on cold start)
|
||||
- System tray with port management
|
||||
- Content Security Policy
|
||||
- Single-instance lock
|
||||
- Auto-update on restart
|
||||
- Platform-conditional UI (macOS traffic lights, Windows/Linux default titlebar)
|
||||
|
||||
📖 See [`electron/README.md`](../electron/README.md) for full documentation.
|
||||
@@ -0,0 +1,87 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/MCP-SERVER.md) · 🇪🇸 [es](../es/MCP-SERVER.md) · 🇫🇷 [fr](../fr/MCP-SERVER.md) · 🇩🇪 [de](../de/MCP-SERVER.md) · 🇮🇹 [it](../it/MCP-SERVER.md) · 🇷🇺 [ru](../ru/MCP-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/MCP-SERVER.md) · 🇯🇵 [ja](../ja/MCP-SERVER.md) · 🇰🇷 [ko](../ko/MCP-SERVER.md) · 🇸🇦 [ar](../ar/MCP-SERVER.md) · 🇮🇳 [in](../in/MCP-SERVER.md) · 🇹🇭 [th](../th/MCP-SERVER.md) · 🇻🇳 [vi](../vi/MCP-SERVER.md) · 🇮🇩 [id](../id/MCP-SERVER.md) · 🇲🇾 [ms](../ms/MCP-SERVER.md) · 🇳🇱 [nl](../nl/MCP-SERVER.md) · 🇵🇱 [pl](../pl/MCP-SERVER.md) · 🇸🇪 [sv](../sv/MCP-SERVER.md) · 🇳🇴 [no](../no/MCP-SERVER.md) · 🇩🇰 [da](../da/MCP-SERVER.md) · 🇫🇮 [fi](../fi/MCP-SERVER.md) · 🇵🇹 [pt](../pt/MCP-SERVER.md) · 🇷🇴 [ro](../ro/MCP-SERVER.md) · 🇭🇺 [hu](../hu/MCP-SERVER.md) · 🇧🇬 [bg](../bg/MCP-SERVER.md) · 🇸🇰 [sk](../sk/MCP-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/MCP-SERVER.md) · 🇮🇱 [he](../he/MCP-SERVER.md) · 🇵🇭 [phi](../phi/MCP-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute MCP Server Documentation
|
||||
|
||||
> Model Context Protocol server with 16 intelligent tools
|
||||
|
||||
## Installation
|
||||
|
||||
OmniRoute MCP is built-in. Start it with:
|
||||
|
||||
```bash
|
||||
omniroute --mcp
|
||||
```
|
||||
|
||||
Or via the open-sse transport:
|
||||
|
||||
```bash
|
||||
# HTTP streamable transport (port 20130)
|
||||
omniroute --dev # MCP auto-starts on /mcp endpoint
|
||||
```
|
||||
|
||||
## IDE Configuration
|
||||
|
||||
See [IDE Configs](integrations/ide-configs.md) for Antigravity, Cursor, Copilot, and Claude Desktop setup.
|
||||
|
||||
---
|
||||
|
||||
## Essential Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :------------------------------ | :--------------------------------------- |
|
||||
| `omniroute_get_health` | Gateway health, circuit breakers, uptime |
|
||||
| `omniroute_list_combos` | All configured combos with models |
|
||||
| `omniroute_get_combo_metrics` | Performance metrics for a specific combo |
|
||||
| `omniroute_switch_combo` | Switch active combo by ID/name |
|
||||
| `omniroute_check_quota` | Quota status per provider or all |
|
||||
| `omniroute_route_request` | Send a chat completion through OmniRoute |
|
||||
| `omniroute_cost_report` | Cost analytics for a time period |
|
||||
| `omniroute_list_models_catalog` | Full model catalog with capabilities |
|
||||
|
||||
## Advanced Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :--------------------------------- | :---------------------------------------------- |
|
||||
| `omniroute_simulate_route` | Dry-run routing simulation with fallback tree |
|
||||
| `omniroute_set_budget_guard` | Session budget with degrade/block/alert actions |
|
||||
| `omniroute_set_resilience_profile` | Apply conservative/balanced/aggressive preset |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo |
|
||||
| `omniroute_get_provider_metrics` | Detailed metrics for one provider |
|
||||
| `omniroute_best_combo_for_task` | Task-fitness recommendation with alternatives |
|
||||
| `omniroute_explain_route` | Explain a past routing decision |
|
||||
| `omniroute_get_session_snapshot` | Full session state: costs, tokens, errors |
|
||||
|
||||
## Authentication
|
||||
|
||||
MCP tools are authenticated via API key scopes. Each tool requires specific scopes:
|
||||
|
||||
| Scope | Tools |
|
||||
| :------------- | :----------------------------------------------- |
|
||||
| `read:health` | get_health, get_provider_metrics |
|
||||
| `read:combos` | list_combos, get_combo_metrics |
|
||||
| `write:combos` | switch_combo |
|
||||
| `read:quota` | check_quota |
|
||||
| `write:route` | route_request, simulate_route, test_combo |
|
||||
| `read:usage` | cost_report, get_session_snapshot, explain_route |
|
||||
| `write:config` | set_budget_guard, set_resilience_profile |
|
||||
| `read:models` | list_models_catalog, best_combo_for_task |
|
||||
|
||||
## Audit Logging
|
||||
|
||||
Every tool call is logged to `mcp_tool_audit` with:
|
||||
|
||||
- Tool name, arguments, result
|
||||
- Duration (ms), success/failure
|
||||
- API key hash, timestamp
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------------ |
|
||||
| `open-sse/mcp-server/server.ts` | MCP server creation + 16 tool registrations |
|
||||
| `open-sse/mcp-server/transport.ts` | Stdio + HTTP transport |
|
||||
| `open-sse/mcp-server/auth.ts` | API key + scope validation |
|
||||
| `open-sse/mcp-server/audit.ts` | Tool call audit logging |
|
||||
| `open-sse/mcp-server/tools/advancedTools.ts` | 8 advanced tool handlers |
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,37 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/RELEASE_CHECKLIST.md) · 🇪🇸 [es](../es/RELEASE_CHECKLIST.md) · 🇫🇷 [fr](../fr/RELEASE_CHECKLIST.md) · 🇩🇪 [de](../de/RELEASE_CHECKLIST.md) · 🇮🇹 [it](../it/RELEASE_CHECKLIST.md) · 🇷🇺 [ru](../ru/RELEASE_CHECKLIST.md) · 🇨🇳 [zh-CN](../zh-CN/RELEASE_CHECKLIST.md) · 🇯🇵 [ja](../ja/RELEASE_CHECKLIST.md) · 🇰🇷 [ko](../ko/RELEASE_CHECKLIST.md) · 🇸🇦 [ar](../ar/RELEASE_CHECKLIST.md) · 🇮🇳 [in](../in/RELEASE_CHECKLIST.md) · 🇹🇭 [th](../th/RELEASE_CHECKLIST.md) · 🇻🇳 [vi](../vi/RELEASE_CHECKLIST.md) · 🇮🇩 [id](../id/RELEASE_CHECKLIST.md) · 🇲🇾 [ms](../ms/RELEASE_CHECKLIST.md) · 🇳🇱 [nl](../nl/RELEASE_CHECKLIST.md) · 🇵🇱 [pl](../pl/RELEASE_CHECKLIST.md) · 🇸🇪 [sv](../sv/RELEASE_CHECKLIST.md) · 🇳🇴 [no](../no/RELEASE_CHECKLIST.md) · 🇩🇰 [da](../da/RELEASE_CHECKLIST.md) · 🇫🇮 [fi](../fi/RELEASE_CHECKLIST.md) · 🇵🇹 [pt](../pt/RELEASE_CHECKLIST.md) · 🇷🇴 [ro](../ro/RELEASE_CHECKLIST.md) · 🇭🇺 [hu](../hu/RELEASE_CHECKLIST.md) · 🇧🇬 [bg](../bg/RELEASE_CHECKLIST.md) · 🇸🇰 [sk](../sk/RELEASE_CHECKLIST.md) · 🇺🇦 [uk-UA](../uk-UA/RELEASE_CHECKLIST.md) · 🇮🇱 [he](../he/RELEASE_CHECKLIST.md) · 🇵🇭 [phi](../phi/RELEASE_CHECKLIST.md)
|
||||
|
||||
---
|
||||
|
||||
# Release Checklist
|
||||
|
||||
Use this checklist before tagging or publishing a new OmniRoute release.
|
||||
|
||||
## Version and Changelog
|
||||
|
||||
1. Bump `package.json` version (`x.y.z`) in the release branch.
|
||||
2. Move release notes from `## [Unreleased]` in `CHANGELOG.md` to a dated section:
|
||||
- `## [x.y.z] — YYYY-MM-DD`
|
||||
3. Keep `## [Unreleased]` as the first changelog section for upcoming work.
|
||||
4. Ensure the latest semver section in `CHANGELOG.md` equals `package.json` version.
|
||||
|
||||
## API Docs
|
||||
|
||||
1. Update `docs/openapi.yaml`:
|
||||
- `info.version` must equal `package.json` version.
|
||||
2. Validate endpoint examples if API contracts changed.
|
||||
|
||||
## Runtime Docs
|
||||
|
||||
1. Review `docs/ARCHITECTURE.md` for storage/runtime drift.
|
||||
2. Review `docs/TROUBLESHOOTING.md` for env var and operational drift.
|
||||
3. Update localized docs if source docs changed significantly.
|
||||
|
||||
## Automated Check
|
||||
|
||||
Run the sync guard locally before opening PR:
|
||||
|
||||
```bash
|
||||
npm run check:docs-sync
|
||||
```
|
||||
|
||||
CI also runs this check in `.github/workflows/ci.yml` (lint job).
|
||||
@@ -0,0 +1,258 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/TROUBLESHOOTING.md) · 🇪🇸 [es](../es/TROUBLESHOOTING.md) · 🇫🇷 [fr](../fr/TROUBLESHOOTING.md) · 🇩🇪 [de](../de/TROUBLESHOOTING.md) · 🇮🇹 [it](../it/TROUBLESHOOTING.md) · 🇷🇺 [ru](../ru/TROUBLESHOOTING.md) · 🇨🇳 [zh-CN](../zh-CN/TROUBLESHOOTING.md) · 🇯🇵 [ja](../ja/TROUBLESHOOTING.md) · 🇰🇷 [ko](../ko/TROUBLESHOOTING.md) · 🇸🇦 [ar](../ar/TROUBLESHOOTING.md) · 🇮🇳 [in](../in/TROUBLESHOOTING.md) · 🇹🇭 [th](../th/TROUBLESHOOTING.md) · 🇻🇳 [vi](../vi/TROUBLESHOOTING.md) · 🇮🇩 [id](../id/TROUBLESHOOTING.md) · 🇲🇾 [ms](../ms/TROUBLESHOOTING.md) · 🇳🇱 [nl](../nl/TROUBLESHOOTING.md) · 🇵🇱 [pl](../pl/TROUBLESHOOTING.md) · 🇸🇪 [sv](../sv/TROUBLESHOOTING.md) · 🇳🇴 [no](../no/TROUBLESHOOTING.md) · 🇩🇰 [da](../da/TROUBLESHOOTING.md) · 🇫🇮 [fi](../fi/TROUBLESHOOTING.md) · 🇵🇹 [pt](../pt/TROUBLESHOOTING.md) · 🇷🇴 [ro](../ro/TROUBLESHOOTING.md) · 🇭🇺 [hu](../hu/TROUBLESHOOTING.md) · 🇧🇬 [bg](../bg/TROUBLESHOOTING.md) · 🇸🇰 [sk](../sk/TROUBLESHOOTING.md) · 🇺🇦 [uk-UA](../uk-UA/TROUBLESHOOTING.md) · 🇮🇱 [he](../he/TROUBLESHOOTING.md) · 🇵🇭 [phi](../phi/TROUBLESHOOTING.md)
|
||||
|
||||
---
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](TROUBLESHOOTING.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/TROUBLESHOOTING.md) | 🇪🇸 [Español](i18n/es/TROUBLESHOOTING.md) | 🇫🇷 [Français](i18n/fr/TROUBLESHOOTING.md) | 🇮🇹 [Italiano](i18n/it/TROUBLESHOOTING.md) | 🇷🇺 [Русский](i18n/ru/TROUBLESHOOTING.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/TROUBLESHOOTING.md) | 🇩🇪 [Deutsch](i18n/de/TROUBLESHOOTING.md) | 🇮🇳 [हिन्दी](i18n/in/TROUBLESHOOTING.md) | 🇹🇭 [ไทย](i18n/th/TROUBLESHOOTING.md) | 🇺🇦 [Українська](i18n/uk-UA/TROUBLESHOOTING.md) | 🇸🇦 [العربية](i18n/ar/TROUBLESHOOTING.md) | 🇯🇵 [日本語](i18n/ja/TROUBLESHOOTING.md) | 🇻🇳 [Tiếng Việt](i18n/vi/TROUBLESHOOTING.md) | 🇧🇬 [Български](i18n/bg/TROUBLESHOOTING.md) | 🇩🇰 [Dansk](i18n/da/TROUBLESHOOTING.md) | 🇫🇮 [Suomi](i18n/fi/TROUBLESHOOTING.md) | 🇮🇱 [עברית](i18n/he/TROUBLESHOOTING.md) | 🇭🇺 [Magyar](i18n/hu/TROUBLESHOOTING.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/TROUBLESHOOTING.md) | 🇰🇷 [한국어](i18n/ko/TROUBLESHOOTING.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/TROUBLESHOOTING.md) | 🇳🇱 [Nederlands](i18n/nl/TROUBLESHOOTING.md) | 🇳🇴 [Norsk](i18n/no/TROUBLESHOOTING.md) | 🇵🇹 [Português (Portugal)](i18n/pt/TROUBLESHOOTING.md) | 🇷🇴 [Română](i18n/ro/TROUBLESHOOTING.md) | 🇵🇱 [Polski](i18n/pl/TROUBLESHOOTING.md) | 🇸🇰 [Slovenčina](i18n/sk/TROUBLESHOOTING.md) | 🇸🇪 [Svenska](i18n/sv/TROUBLESHOOTING.md) | 🇵🇭 [Filipino](i18n/phi/TROUBLESHOOTING.md)
|
||||
|
||||
Common problems and solutions for OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Quick Fixes
|
||||
|
||||
| Problem | Solution |
|
||||
| ----------------------------- | ------------------------------------------------------------------ |
|
||||
| First login not working | Set `INITIAL_PASSWORD` in `.env` (no hardcoded default) |
|
||||
| Dashboard opens on wrong port | Set `PORT=20128` and `NEXT_PUBLIC_BASE_URL=http://localhost:20128` |
|
||||
| No request logs under `logs/` | Set `ENABLE_REQUEST_LOGS=true` |
|
||||
| EACCES: permission denied | Set `DATA_DIR=/path/to/writable/dir` to override `~/.omniroute` |
|
||||
| Routing strategy not saving | Update to v1.4.11+ (Zod schema fix for settings persistence) |
|
||||
|
||||
---
|
||||
|
||||
## Provider Issues
|
||||
|
||||
### "Language model did not provide messages"
|
||||
|
||||
**Cause:** Provider quota exhausted.
|
||||
|
||||
**Fix:**
|
||||
|
||||
1. Check dashboard quota tracker
|
||||
2. Use a combo with fallback tiers
|
||||
3. Switch to cheaper/free tier
|
||||
|
||||
### Rate Limiting
|
||||
|
||||
**Cause:** Subscription quota exhausted.
|
||||
|
||||
**Fix:**
|
||||
|
||||
- Add fallback: `cc/claude-opus-4-6 → glm/glm-4.7 → if/kimi-k2-thinking`
|
||||
- Use GLM/MiniMax as cheap backup
|
||||
|
||||
### OAuth Token Expired
|
||||
|
||||
OmniRoute auto-refreshes tokens. If issues persist:
|
||||
|
||||
1. Dashboard → Provider → Reconnect
|
||||
2. Delete and re-add the provider connection
|
||||
|
||||
---
|
||||
|
||||
## Cloud Issues
|
||||
|
||||
### Cloud Sync Errors
|
||||
|
||||
1. Verify `BASE_URL` points to your running instance (e.g., `http://localhost:20128`)
|
||||
2. Verify `CLOUD_URL` points to your cloud endpoint (e.g., `https://omniroute.dev`)
|
||||
3. Keep `NEXT_PUBLIC_*` values aligned with server-side values
|
||||
|
||||
### Cloud `stream=false` Returns 500
|
||||
|
||||
**Symptom:** `Unexpected token 'd'...` on cloud endpoint for non-streaming calls.
|
||||
|
||||
**Cause:** Upstream returns SSE payload while client expects JSON.
|
||||
|
||||
**Workaround:** Use `stream=true` for cloud direct calls. Local runtime includes SSE→JSON fallback.
|
||||
|
||||
### Cloud Says Connected but "Invalid API key"
|
||||
|
||||
1. Create a fresh key from local dashboard (`/api/keys`)
|
||||
2. Run cloud sync: Enable Cloud → Sync Now
|
||||
3. Old/non-synced keys can still return `401` on cloud
|
||||
|
||||
---
|
||||
|
||||
## Docker Issues
|
||||
|
||||
### CLI Tool Shows Not Installed
|
||||
|
||||
1. Check runtime fields: `curl http://localhost:20128/api/cli-tools/runtime/codex | jq`
|
||||
2. For portable mode: use image target `runner-cli` (bundled CLIs)
|
||||
3. For host mount mode: set `CLI_EXTRA_PATHS` and mount host bin directory as read-only
|
||||
4. If `installed=true` and `runnable=false`: binary was found but failed healthcheck
|
||||
|
||||
### Quick Runtime Validation
|
||||
|
||||
```bash
|
||||
curl -s http://localhost:20128/api/cli-tools/codex-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
curl -s http://localhost:20128/api/cli-tools/claude-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
curl -s http://localhost:20128/api/cli-tools/openclaw-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cost Issues
|
||||
|
||||
### High Costs
|
||||
|
||||
1. Check usage stats in Dashboard → Usage
|
||||
2. Switch primary model to GLM/MiniMax
|
||||
3. Use free tier (Gemini CLI, iFlow) for non-critical tasks
|
||||
4. Set cost budgets per API key: Dashboard → API Keys → Budget
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
### Enable Request Logs
|
||||
|
||||
Set `ENABLE_REQUEST_LOGS=true` in your `.env` file. Logs appear under `logs/` directory.
|
||||
|
||||
### Check Provider Health
|
||||
|
||||
```bash
|
||||
# Health dashboard
|
||||
http://localhost:20128/dashboard/health
|
||||
|
||||
# API health check
|
||||
curl http://localhost:20128/api/monitoring/health
|
||||
```
|
||||
|
||||
### Runtime Storage
|
||||
|
||||
- Main state: `${DATA_DIR}/storage.sqlite` (providers, combos, aliases, keys, settings)
|
||||
- Usage: SQLite tables in `storage.sqlite` (`usage_history`, `call_logs`, `proxy_logs`) + optional `${DATA_DIR}/log.txt` and `${DATA_DIR}/call_logs/`
|
||||
- Request logs: `<repo>/logs/...` (when `ENABLE_REQUEST_LOGS=true`)
|
||||
|
||||
---
|
||||
|
||||
## Circuit Breaker Issues
|
||||
|
||||
### Provider stuck in OPEN state
|
||||
|
||||
When a provider's circuit breaker is OPEN, requests are blocked until the cooldown expires.
|
||||
|
||||
**Fix:**
|
||||
|
||||
1. Go to **Dashboard → Settings → Resilience**
|
||||
2. Check the circuit breaker card for the affected provider
|
||||
3. Click **Reset All** to clear all breakers, or wait for the cooldown to expire
|
||||
4. Verify the provider is actually available before resetting
|
||||
|
||||
### Provider keeps tripping the circuit breaker
|
||||
|
||||
If a provider repeatedly enters OPEN state:
|
||||
|
||||
1. Check **Dashboard → Health → Provider Health** for the failure pattern
|
||||
2. Go to **Settings → Resilience → Provider Profiles** and increase the failure threshold
|
||||
3. Check if the provider has changed API limits or requires re-authentication
|
||||
4. Review latency telemetry — high latency may cause timeout-based failures
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription Issues
|
||||
|
||||
### "Unsupported model" error
|
||||
|
||||
- Ensure you're using the correct prefix: `deepgram/nova-3` or `assemblyai/best`
|
||||
- Verify the provider is connected in **Dashboard → Providers**
|
||||
|
||||
### Transcription returns empty or fails
|
||||
|
||||
- Check supported audio formats: `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`
|
||||
- Verify file size is within provider limits (typically < 25MB)
|
||||
- Check provider API key validity in the provider card
|
||||
|
||||
---
|
||||
|
||||
## Translator Debugging
|
||||
|
||||
Use **Dashboard → Translator** to debug format translation issues:
|
||||
|
||||
| Mode | When to Use |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------- |
|
||||
| **Playground** | Compare input/output formats side by side — paste a failing request to see how it translates |
|
||||
| **Chat Tester** | Send live messages and inspect the full request/response payload including headers |
|
||||
| **Test Bench** | Run batch tests across format combinations to find which translations are broken |
|
||||
| **Live Monitor** | Watch real-time request flow to catch intermittent translation issues |
|
||||
|
||||
### Common format issues
|
||||
|
||||
- **Thinking tags not appearing** — Check if the target provider supports thinking and the thinking budget setting
|
||||
- **Tool calls dropping** — Some format translations may strip unsupported fields; verify in Playground mode
|
||||
- **System prompt missing** — Claude and Gemini handle system prompts differently; check translation output
|
||||
- **SDK returns raw string instead of object** — Fixed in v1.1.0: response sanitizer now strips non-standard fields (`x_groq`, `usage_breakdown`, etc.) that cause OpenAI SDK Pydantic validation failures
|
||||
- **GLM/ERNIE rejects `system` role** — Fixed in v1.1.0: role normalizer automatically merges system messages into user messages for incompatible models
|
||||
- **`developer` role not recognized** — Fixed in v1.1.0: automatically converted to `system` for non-OpenAI providers
|
||||
- **`json_schema` not working with Gemini** — Fixed in v1.1.0: `response_format` is now converted to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
---
|
||||
|
||||
## Resilience Settings
|
||||
|
||||
### Auto rate-limit not triggering
|
||||
|
||||
- Auto rate-limit only applies to API key providers (not OAuth/subscription)
|
||||
- Verify **Settings → Resilience → Provider Profiles** has auto-rate-limit enabled
|
||||
- Check if the provider returns `429` status codes or `Retry-After` headers
|
||||
|
||||
### Tuning exponential backoff
|
||||
|
||||
Provider profiles support these settings:
|
||||
|
||||
- **Base delay** — Initial wait time after first failure (default: 1s)
|
||||
- **Max delay** — Maximum wait time cap (default: 30s)
|
||||
- **Multiplier** — How much to increase delay per consecutive failure (default: 2x)
|
||||
|
||||
### Anti-thundering herd
|
||||
|
||||
When many concurrent requests hit a rate-limited provider, OmniRoute uses mutex + auto rate-limiting to serialize requests and prevent cascading failures. This is automatic for API key providers.
|
||||
|
||||
---
|
||||
|
||||
## Optional RAG / LLM failure taxonomy (16 problems)
|
||||
|
||||
Some OmniRoute users place the gateway in front of RAG or agent stacks. In those setups it is common to see a strange pattern: OmniRoute looks healthy (providers up, routing profiles ok, no rate limit alerts) but the final answer is still wrong.
|
||||
|
||||
In practice these incidents usually come from the downstream RAG pipeline, not from the gateway itself.
|
||||
|
||||
If you want a shared vocabulary to describe those failures you can use the WFGY ProblemMap, an external MIT license text resource that defines sixteen recurring RAG / LLM failure patterns. At a high level it covers:
|
||||
|
||||
- retrieval drift and broken context boundaries
|
||||
- empty or stale indexes and vector stores
|
||||
- embedding versus semantic mismatch
|
||||
- prompt assembly and context window issues
|
||||
- logic collapse and overconfident answers
|
||||
- long chain and agent coordination failures
|
||||
- multi agent memory and role drift
|
||||
- deployment and bootstrap ordering problems
|
||||
|
||||
The idea is simple:
|
||||
|
||||
1. When you investigate a bad response, capture:
|
||||
- user task and request
|
||||
- route or provider combo in OmniRoute
|
||||
- any RAG context used downstream (retrieved documents, tool calls, etc)
|
||||
2. Map the incident to one or two WFGY ProblemMap numbers (`No.1` … `No.16`).
|
||||
3. Store the number in your own dashboard, runbook, or incident tracker next to the OmniRoute logs.
|
||||
4. Use the corresponding WFGY page to decide whether you need to change your RAG stack, retriever, or routing strategy.
|
||||
|
||||
Full text and concrete recipes live here (MIT license, text only):
|
||||
|
||||
[WFGY ProblemMap README](https://github.com/onestardao/WFGY/blob/main/ProblemMap/README.md)
|
||||
|
||||
You can ignore this section if you do not run RAG or agent pipelines behind OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Still Stuck?
|
||||
|
||||
- **GitHub Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **Architecture**: See [`docs/ARCHITECTURE.md`](ARCHITECTURE.md) for internal details
|
||||
- **API Reference**: See [`docs/API_REFERENCE.md`](API_REFERENCE.md) for all endpoints
|
||||
- **Health Dashboard**: Check **Dashboard → Health** for real-time system status
|
||||
- **Translator**: Use **Dashboard → Translator** to debug format issues
|
||||
@@ -0,0 +1,813 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/USER_GUIDE.md) · 🇪🇸 [es](../es/USER_GUIDE.md) · 🇫🇷 [fr](../fr/USER_GUIDE.md) · 🇩🇪 [de](../de/USER_GUIDE.md) · 🇮🇹 [it](../it/USER_GUIDE.md) · 🇷🇺 [ru](../ru/USER_GUIDE.md) · 🇨🇳 [zh-CN](../zh-CN/USER_GUIDE.md) · 🇯🇵 [ja](../ja/USER_GUIDE.md) · 🇰🇷 [ko](../ko/USER_GUIDE.md) · 🇸🇦 [ar](../ar/USER_GUIDE.md) · 🇮🇳 [in](../in/USER_GUIDE.md) · 🇹🇭 [th](../th/USER_GUIDE.md) · 🇻🇳 [vi](../vi/USER_GUIDE.md) · 🇮🇩 [id](../id/USER_GUIDE.md) · 🇲🇾 [ms](../ms/USER_GUIDE.md) · 🇳🇱 [nl](../nl/USER_GUIDE.md) · 🇵🇱 [pl](../pl/USER_GUIDE.md) · 🇸🇪 [sv](../sv/USER_GUIDE.md) · 🇳🇴 [no](../no/USER_GUIDE.md) · 🇩🇰 [da](../da/USER_GUIDE.md) · 🇫🇮 [fi](../fi/USER_GUIDE.md) · 🇵🇹 [pt](../pt/USER_GUIDE.md) · 🇷🇴 [ro](../ro/USER_GUIDE.md) · 🇭🇺 [hu](../hu/USER_GUIDE.md) · 🇧🇬 [bg](../bg/USER_GUIDE.md) · 🇸🇰 [sk](../sk/USER_GUIDE.md) · 🇺🇦 [uk-UA](../uk-UA/USER_GUIDE.md) · 🇮🇱 [he](../he/USER_GUIDE.md) · 🇵🇭 [phi](../phi/USER_GUIDE.md)
|
||||
|
||||
---
|
||||
|
||||
# User Guide
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](USER_GUIDE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/USER_GUIDE.md) | 🇪🇸 [Español](i18n/es/USER_GUIDE.md) | 🇫🇷 [Français](i18n/fr/USER_GUIDE.md) | 🇮🇹 [Italiano](i18n/it/USER_GUIDE.md) | 🇷🇺 [Русский](i18n/ru/USER_GUIDE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/USER_GUIDE.md) | 🇩🇪 [Deutsch](i18n/de/USER_GUIDE.md) | 🇮🇳 [हिन्दी](i18n/in/USER_GUIDE.md) | 🇹🇭 [ไทย](i18n/th/USER_GUIDE.md) | 🇺🇦 [Українська](i18n/uk-UA/USER_GUIDE.md) | 🇸🇦 [العربية](i18n/ar/USER_GUIDE.md) | 🇯🇵 [日本語](i18n/ja/USER_GUIDE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/USER_GUIDE.md) | 🇧🇬 [Български](i18n/bg/USER_GUIDE.md) | 🇩🇰 [Dansk](i18n/da/USER_GUIDE.md) | 🇫🇮 [Suomi](i18n/fi/USER_GUIDE.md) | 🇮🇱 [עברית](i18n/he/USER_GUIDE.md) | 🇭🇺 [Magyar](i18n/hu/USER_GUIDE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/USER_GUIDE.md) | 🇰🇷 [한국어](i18n/ko/USER_GUIDE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/USER_GUIDE.md) | 🇳🇱 [Nederlands](i18n/nl/USER_GUIDE.md) | 🇳🇴 [Norsk](i18n/no/USER_GUIDE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/USER_GUIDE.md) | 🇷🇴 [Română](i18n/ro/USER_GUIDE.md) | 🇵🇱 [Polski](i18n/pl/USER_GUIDE.md) | 🇸🇰 [Slovenčina](i18n/sk/USER_GUIDE.md) | 🇸🇪 [Svenska](i18n/sv/USER_GUIDE.md) | 🇵🇭 [Filipino](i18n/phi/USER_GUIDE.md)
|
||||
|
||||
Complete guide for configuring providers, creating combos, integrating CLI tools, and deploying OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Pricing at a Glance](#-pricing-at-a-glance)
|
||||
- [Use Cases](#-use-cases)
|
||||
- [Provider Setup](#-provider-setup)
|
||||
- [CLI Integration](#-cli-integration)
|
||||
- [Deployment](#-deployment)
|
||||
- [Available Models](#-available-models)
|
||||
- [Advanced Features](#-advanced-features)
|
||||
|
||||
---
|
||||
|
||||
## 💰 Pricing at a Glance
|
||||
|
||||
| Tier | Provider | Cost | Quota Reset | Best For |
|
||||
| ------------------- | ----------------- | ----------- | ---------------- | -------------------- |
|
||||
| **💳 SUBSCRIPTION** | Claude Code (Pro) | $20/mo | 5h + weekly | Already subscribed |
|
||||
| | Codex (Plus/Pro) | $20-200/mo | 5h + weekly | OpenAI users |
|
||||
| | Gemini CLI | **FREE** | 180K/mo + 1K/day | Everyone! |
|
||||
| | GitHub Copilot | $10-19/mo | Monthly | GitHub users |
|
||||
| **🔑 API KEY** | DeepSeek | Pay per use | None | Cheap reasoning |
|
||||
| | Groq | Pay per use | None | Ultra-fast inference |
|
||||
| | xAI (Grok) | Pay per use | None | Grok 4 reasoning |
|
||||
| | Mistral | Pay per use | None | EU-hosted models |
|
||||
| | Perplexity | Pay per use | None | Search-augmented |
|
||||
| | Together AI | Pay per use | None | Open-source models |
|
||||
| | Fireworks AI | Pay per use | None | Fast FLUX images |
|
||||
| | Cerebras | Pay per use | None | Wafer-scale speed |
|
||||
| | Cohere | Pay per use | None | Command R+ RAG |
|
||||
| | NVIDIA NIM | Pay per use | None | Enterprise models |
|
||||
| **💰 CHEAP** | GLM-4.7 | $0.6/1M | Daily 10AM | Budget backup |
|
||||
| | MiniMax M2.1 | $0.2/1M | 5-hour rolling | Cheapest option |
|
||||
| | Kimi K2 | $9/mo flat | 10M tokens/mo | Predictable cost |
|
||||
| **🆓 FREE** | iFlow | $0 | Unlimited | 8 models free |
|
||||
| | Qwen | $0 | Unlimited | 3 models free |
|
||||
| | Kiro | $0 | Unlimited | Claude free |
|
||||
|
||||
**💡 Pro Tip:** Start with Gemini CLI (180K free/month) + iFlow (unlimited free) combo = $0 cost!
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Use Cases
|
||||
|
||||
### Case 1: "I have Claude Pro subscription"
|
||||
|
||||
**Problem:** Quota expires unused, rate limits during heavy coding
|
||||
|
||||
```
|
||||
Combo: "maximize-claude"
|
||||
1. cc/claude-opus-4-6 (use subscription fully)
|
||||
2. glm/glm-4.7 (cheap backup when quota out)
|
||||
3. if/kimi-k2-thinking (free emergency fallback)
|
||||
|
||||
Monthly cost: $20 (subscription) + ~$5 (backup) = $25 total
|
||||
vs. $20 + hitting limits = frustration
|
||||
```
|
||||
|
||||
### Case 2: "I want zero cost"
|
||||
|
||||
**Problem:** Can't afford subscriptions, need reliable AI coding
|
||||
|
||||
```
|
||||
Combo: "free-forever"
|
||||
1. gc/gemini-3-flash (180K free/month)
|
||||
2. if/kimi-k2-thinking (unlimited free)
|
||||
3. qw/qwen3-coder-plus (unlimited free)
|
||||
|
||||
Monthly cost: $0
|
||||
Quality: Production-ready models
|
||||
```
|
||||
|
||||
### Case 3: "I need 24/7 coding, no interruptions"
|
||||
|
||||
**Problem:** Deadlines, can't afford downtime
|
||||
|
||||
```
|
||||
Combo: "always-on"
|
||||
1. cc/claude-opus-4-6 (best quality)
|
||||
2. cx/gpt-5.2-codex (second subscription)
|
||||
3. glm/glm-4.7 (cheap, resets daily)
|
||||
4. minimax/MiniMax-M2.1 (cheapest, 5h reset)
|
||||
5. if/kimi-k2-thinking (free unlimited)
|
||||
|
||||
Result: 5 layers of fallback = zero downtime
|
||||
Monthly cost: $20-200 (subscriptions) + $10-20 (backup)
|
||||
```
|
||||
|
||||
### Case 4: "I want FREE AI in OpenClaw"
|
||||
|
||||
**Problem:** Need AI assistant in messaging apps, completely free
|
||||
|
||||
```
|
||||
Combo: "openclaw-free"
|
||||
1. if/glm-4.7 (unlimited free)
|
||||
2. if/minimax-m2.1 (unlimited free)
|
||||
3. if/kimi-k2-thinking (unlimited free)
|
||||
|
||||
Monthly cost: $0
|
||||
Access via: WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📖 Provider Setup
|
||||
|
||||
### 🔐 Subscription Providers
|
||||
|
||||
#### Claude Code (Pro/Max)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Claude Code
|
||||
→ OAuth login → Auto token refresh
|
||||
→ 5-hour + weekly quota tracking
|
||||
|
||||
Models:
|
||||
cc/claude-opus-4-6
|
||||
cc/claude-sonnet-4-5-20250929
|
||||
cc/claude-haiku-4-5-20251001
|
||||
```
|
||||
|
||||
**Pro Tip:** Use Opus for complex tasks, Sonnet for speed. OmniRoute tracks quota per model!
|
||||
|
||||
#### OpenAI Codex (Plus/Pro)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Codex
|
||||
→ OAuth login (port 1455)
|
||||
→ 5-hour + weekly reset
|
||||
|
||||
Models:
|
||||
cx/gpt-5.2-codex
|
||||
cx/gpt-5.1-codex-max
|
||||
```
|
||||
|
||||
#### Gemini CLI (FREE 180K/month!)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Gemini CLI
|
||||
→ Google OAuth
|
||||
→ 180K completions/month + 1K/day
|
||||
|
||||
Models:
|
||||
gc/gemini-3-flash-preview
|
||||
gc/gemini-2.5-pro
|
||||
```
|
||||
|
||||
**Best Value:** Huge free tier! Use this before paid tiers.
|
||||
|
||||
#### GitHub Copilot
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect GitHub
|
||||
→ OAuth via GitHub
|
||||
→ Monthly reset (1st of month)
|
||||
|
||||
Models:
|
||||
gh/gpt-5
|
||||
gh/claude-4.5-sonnet
|
||||
gh/gemini-3-pro
|
||||
```
|
||||
|
||||
### 💰 Cheap Providers
|
||||
|
||||
#### GLM-4.7 (Daily reset, $0.6/1M)
|
||||
|
||||
1. Sign up: [Zhipu AI](https://open.bigmodel.cn/)
|
||||
2. Get API key from Coding Plan
|
||||
3. Dashboard → Add API Key: Provider: `glm`, API Key: `your-key`
|
||||
|
||||
**Use:** `glm/glm-4.7` — **Pro Tip:** Coding Plan offers 3× quota at 1/7 cost! Reset daily 10:00 AM.
|
||||
|
||||
#### MiniMax M2.1 (5h reset, $0.20/1M)
|
||||
|
||||
1. Sign up: [MiniMax](https://www.minimax.io/)
|
||||
2. Get API key → Dashboard → Add API Key
|
||||
|
||||
**Use:** `minimax/MiniMax-M2.1` — **Pro Tip:** Cheapest option for long context (1M tokens)!
|
||||
|
||||
#### Kimi K2 ($9/month flat)
|
||||
|
||||
1. Subscribe: [Moonshot AI](https://platform.moonshot.ai/)
|
||||
2. Get API key → Dashboard → Add API Key
|
||||
|
||||
**Use:** `kimi/kimi-latest` — **Pro Tip:** Fixed $9/month for 10M tokens = $0.90/1M effective cost!
|
||||
|
||||
### 🆓 FREE Providers
|
||||
|
||||
#### iFlow (8 FREE models)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect iFlow → OAuth login → Unlimited usage
|
||||
|
||||
Models: if/kimi-k2-thinking, if/qwen3-coder-plus, if/glm-4.7, if/minimax-m2, if/deepseek-r1
|
||||
```
|
||||
|
||||
#### Qwen (3 FREE models)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect Qwen → Device code auth → Unlimited usage
|
||||
|
||||
Models: qw/qwen3-coder-plus, qw/qwen3-coder-flash
|
||||
```
|
||||
|
||||
#### Kiro (Claude FREE)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect Kiro → AWS Builder ID or Google/GitHub → Unlimited
|
||||
|
||||
Models: kr/claude-sonnet-4.5, kr/claude-haiku-4.5
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
### Example 1: Maximize Subscription → Cheap Backup
|
||||
|
||||
```
|
||||
Dashboard → Combos → Create New
|
||||
|
||||
Name: premium-coding
|
||||
Models:
|
||||
1. cc/claude-opus-4-6 (Subscription primary)
|
||||
2. glm/glm-4.7 (Cheap backup, $0.6/1M)
|
||||
3. minimax/MiniMax-M2.1 (Cheapest fallback, $0.20/1M)
|
||||
|
||||
Use in CLI: premium-coding
|
||||
```
|
||||
|
||||
### Example 2: Free-Only (Zero Cost)
|
||||
|
||||
```
|
||||
Name: free-combo
|
||||
Models:
|
||||
1. gc/gemini-3-flash-preview (180K free/month)
|
||||
2. if/kimi-k2-thinking (unlimited)
|
||||
3. qw/qwen3-coder-plus (unlimited)
|
||||
|
||||
Cost: $0 forever!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Integration
|
||||
|
||||
### Cursor IDE
|
||||
|
||||
```
|
||||
Settings → Models → Advanced:
|
||||
OpenAI API Base URL: http://localhost:20128/v1
|
||||
OpenAI API Key: [from omniroute dashboard]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
|
||||
Edit `~/.claude/config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"anthropic_api_base": "http://localhost:20128/v1",
|
||||
"anthropic_api_key": "your-omniroute-api-key"
|
||||
}
|
||||
```
|
||||
|
||||
### Codex CLI
|
||||
|
||||
```bash
|
||||
export OPENAI_BASE_URL="http://localhost:20128"
|
||||
export OPENAI_API_KEY="your-omniroute-api-key"
|
||||
codex "your prompt"
|
||||
```
|
||||
|
||||
### OpenClaw
|
||||
|
||||
Edit `~/.openclaw/openclaw.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": { "primary": "omniroute/if/glm-4.7" }
|
||||
}
|
||||
},
|
||||
"models": {
|
||||
"providers": {
|
||||
"omniroute": {
|
||||
"baseUrl": "http://localhost:20128/v1",
|
||||
"apiKey": "your-omniroute-api-key",
|
||||
"api": "openai-completions",
|
||||
"models": [{ "id": "if/glm-4.7", "name": "glm-4.7" }]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Or use Dashboard:** CLI Tools → OpenClaw → Auto-config
|
||||
|
||||
### Cline / Continue / RooCode
|
||||
|
||||
```
|
||||
Provider: OpenAI Compatible
|
||||
Base URL: http://localhost:20128/v1
|
||||
API Key: [from dashboard]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Deployment
|
||||
|
||||
### Global npm install (Recommended)
|
||||
|
||||
```bash
|
||||
npm install -g omniroute
|
||||
|
||||
# Create config directory
|
||||
mkdir -p ~/.omniroute
|
||||
|
||||
# Create .env file (see .env.example)
|
||||
cp .env.example ~/.omniroute/.env
|
||||
|
||||
# Start server
|
||||
omniroute
|
||||
# Or with custom port:
|
||||
omniroute --port 3000
|
||||
```
|
||||
|
||||
The CLI automatically loads `.env` from `~/.omniroute/.env` or `./.env`.
|
||||
|
||||
### VPS Deployment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/diegosouzapw/OmniRoute.git
|
||||
cd OmniRoute && npm install && npm run build
|
||||
|
||||
export JWT_SECRET="your-secure-secret-change-this"
|
||||
export INITIAL_PASSWORD="your-password"
|
||||
export DATA_DIR="/var/lib/omniroute"
|
||||
export PORT="20128"
|
||||
export HOSTNAME="0.0.0.0"
|
||||
export NODE_ENV="production"
|
||||
export NEXT_PUBLIC_BASE_URL="http://localhost:20128"
|
||||
export API_KEY_SECRET="endpoint-proxy-api-key-secret"
|
||||
|
||||
npm run start
|
||||
# Or: pm2 start npm --name omniroute -- start
|
||||
```
|
||||
|
||||
### PM2 Deployment (Low Memory)
|
||||
|
||||
For servers with limited RAM, use the memory limit option:
|
||||
|
||||
```bash
|
||||
# With 512MB limit (default)
|
||||
pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or with custom memory limit
|
||||
OMNIROUTE_MEMORY_MB=512 pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or using ecosystem.config.js
|
||||
pm2 start ecosystem.config.js
|
||||
```
|
||||
|
||||
Create `ecosystem.config.js`:
|
||||
|
||||
```javascript
|
||||
module.exports = {
|
||||
apps: [
|
||||
{
|
||||
name: "omniroute",
|
||||
script: "npm",
|
||||
args: "start",
|
||||
env: {
|
||||
NODE_ENV: "production",
|
||||
OMNIROUTE_MEMORY_MB: "512",
|
||||
JWT_SECRET: "your-secret",
|
||||
INITIAL_PASSWORD: "your-password",
|
||||
},
|
||||
node_args: "--max-old-space-size=512",
|
||||
max_memory_restart: "300M",
|
||||
},
|
||||
],
|
||||
};
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
# Build image (default = runner-cli with codex/claude/droid preinstalled)
|
||||
docker build -t omniroute:cli .
|
||||
|
||||
# Portable mode (recommended)
|
||||
docker run -d --name omniroute -p 20128:20128 --env-file ./.env -v omniroute-data:/app/data omniroute:cli
|
||||
```
|
||||
|
||||
For host-integrated mode with CLI binaries, see the Docker section in the main docs.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ------------------------- | ------------------------------------ | ------------------------------------------------------- |
|
||||
| `JWT_SECRET` | `omniroute-default-secret-change-me` | JWT signing secret (**change in production**) |
|
||||
| `INITIAL_PASSWORD` | `123456` | First login password |
|
||||
| `DATA_DIR` | `~/.omniroute` | Data directory (db, usage, logs) |
|
||||
| `PORT` | framework default | Service port (`20128` in examples) |
|
||||
| `HOSTNAME` | framework default | Bind host (Docker defaults to `0.0.0.0`) |
|
||||
| `NODE_ENV` | runtime default | Set `production` for deploy |
|
||||
| `BASE_URL` | `http://localhost:20128` | Server-side internal base URL |
|
||||
| `CLOUD_URL` | `https://omniroute.dev` | Cloud sync endpoint base URL |
|
||||
| `API_KEY_SECRET` | `endpoint-proxy-api-key-secret` | HMAC secret for generated API keys |
|
||||
| `REQUIRE_API_KEY` | `false` | Enforce Bearer API key on `/v1/*` |
|
||||
| `ENABLE_REQUEST_LOGS` | `false` | Enables request/response logs |
|
||||
| `AUTH_COOKIE_SECURE` | `false` | Force `Secure` auth cookie (behind HTTPS reverse proxy) |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit in MB |
|
||||
| `PROMPT_CACHE_MAX_SIZE` | `50` | Max prompt cache entries |
|
||||
| `SEMANTIC_CACHE_MAX_SIZE` | `100` | Max semantic cache entries |
|
||||
|
||||
For the full environment variable reference, see the [README](../README.md).
|
||||
|
||||
---
|
||||
|
||||
## 📊 Available Models
|
||||
|
||||
<details>
|
||||
<summary><b>View all available models</b></summary>
|
||||
|
||||
**Claude Code (`cc/`)** — Pro/Max: `cc/claude-opus-4-6`, `cc/claude-sonnet-4-5-20250929`, `cc/claude-haiku-4-5-20251001`
|
||||
|
||||
**Codex (`cx/`)** — Plus/Pro: `cx/gpt-5.2-codex`, `cx/gpt-5.1-codex-max`
|
||||
|
||||
**Gemini CLI (`gc/`)** — FREE: `gc/gemini-3-flash-preview`, `gc/gemini-2.5-pro`
|
||||
|
||||
**GitHub Copilot (`gh/`)**: `gh/gpt-5`, `gh/claude-4.5-sonnet`
|
||||
|
||||
**GLM (`glm/`)** — $0.6/1M: `glm/glm-4.7`
|
||||
|
||||
**MiniMax (`minimax/`)** — $0.2/1M: `minimax/MiniMax-M2.1`
|
||||
|
||||
**iFlow (`if/`)** — FREE: `if/kimi-k2-thinking`, `if/qwen3-coder-plus`, `if/deepseek-r1`
|
||||
|
||||
**Qwen (`qw/`)** — FREE: `qw/qwen3-coder-plus`, `qw/qwen3-coder-flash`
|
||||
|
||||
**Kiro (`kr/`)** — FREE: `kr/claude-sonnet-4.5`, `kr/claude-haiku-4.5`
|
||||
|
||||
**DeepSeek (`ds/`)**: `ds/deepseek-chat`, `ds/deepseek-reasoner`
|
||||
|
||||
**Groq (`groq/`)**: `groq/llama-3.3-70b-versatile`, `groq/llama-4-maverick-17b-128e-instruct`
|
||||
|
||||
**xAI (`xai/`)**: `xai/grok-4`, `xai/grok-4-0709-fast-reasoning`, `xai/grok-code-mini`
|
||||
|
||||
**Mistral (`mistral/`)**: `mistral/mistral-large-2501`, `mistral/codestral-2501`
|
||||
|
||||
**Perplexity (`pplx/`)**: `pplx/sonar-pro`, `pplx/sonar`
|
||||
|
||||
**Together AI (`together/`)**: `together/meta-llama/Llama-3.3-70B-Instruct-Turbo`
|
||||
|
||||
**Fireworks AI (`fireworks/`)**: `fireworks/accounts/fireworks/models/deepseek-v3p1`
|
||||
|
||||
**Cerebras (`cerebras/`)**: `cerebras/llama-3.3-70b`
|
||||
|
||||
**Cohere (`cohere/`)**: `cohere/command-r-plus-08-2024`
|
||||
|
||||
**NVIDIA NIM (`nvidia/`)**: `nvidia/nvidia/llama-3.3-70b-instruct`
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Advanced Features
|
||||
|
||||
### Custom Models
|
||||
|
||||
Add any model ID to any provider without waiting for an app update:
|
||||
|
||||
```bash
|
||||
# Via API
|
||||
curl -X POST http://localhost:20128/api/provider-models \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"provider": "openai", "modelId": "gpt-4.5-preview", "modelName": "GPT-4.5 Preview"}'
|
||||
|
||||
# List: curl http://localhost:20128/api/provider-models?provider=openai
|
||||
# Remove: curl -X DELETE "http://localhost:20128/api/provider-models?provider=openai&model=gpt-4.5-preview"
|
||||
```
|
||||
|
||||
Or use Dashboard: **Providers → [Provider] → Custom Models**.
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
Route requests directly to a specific provider with model validation:
|
||||
|
||||
```bash
|
||||
POST http://localhost:20128/v1/providers/openai/chat/completions
|
||||
POST http://localhost:20128/v1/providers/openai/embeddings
|
||||
POST http://localhost:20128/v1/providers/fireworks/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
### Network Proxy Configuration
|
||||
|
||||
```bash
|
||||
# Set global proxy
|
||||
curl -X PUT http://localhost:20128/api/settings/proxy \
|
||||
-d '{"global": {"type":"http","host":"proxy.example.com","port":"8080"}}'
|
||||
|
||||
# Per-provider proxy
|
||||
curl -X PUT http://localhost:20128/api/settings/proxy \
|
||||
-d '{"providers": {"openai": {"type":"socks5","host":"proxy.example.com","port":"1080"}}}'
|
||||
|
||||
# Test proxy
|
||||
curl -X POST http://localhost:20128/api/settings/proxy/test \
|
||||
-d '{"proxy":{"type":"socks5","host":"proxy.example.com","port":"1080"}}'
|
||||
```
|
||||
|
||||
**Precedence:** Key-specific → Combo-specific → Provider-specific → Global → Environment.
|
||||
|
||||
### Model Catalog API
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/api/models/catalog
|
||||
```
|
||||
|
||||
Returns models grouped by provider with types (`chat`, `embedding`, `image`).
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
- Sync providers, combos, and settings across devices
|
||||
- Automatic background sync with timeout + fail-fast
|
||||
- Prefer server-side `BASE_URL`/`CLOUD_URL` in production
|
||||
|
||||
### LLM Gateway Intelligence (Phase 9)
|
||||
|
||||
- **Semantic Cache** — Auto-caches non-streaming, temperature=0 responses (bypass with `X-OmniRoute-No-Cache: true`)
|
||||
- **Request Idempotency** — Deduplicates requests within 5s via `Idempotency-Key` or `X-Request-Id` header
|
||||
- **Progress Tracking** — Opt-in SSE `event: progress` events via `X-OmniRoute-Progress: true` header
|
||||
|
||||
---
|
||||
|
||||
### Translator Playground
|
||||
|
||||
Access via **Dashboard → Translator**. Debug and visualize how OmniRoute translates API requests between providers.
|
||||
|
||||
| Mode | Purpose |
|
||||
| ---------------- | -------------------------------------------------------------------------------------- |
|
||||
| **Playground** | Select source/target formats, paste a request, and see the translated output instantly |
|
||||
| **Chat Tester** | Send live chat messages through the proxy and inspect the full request/response cycle |
|
||||
| **Test Bench** | Run batch tests across multiple format combinations to verify translation correctness |
|
||||
| **Live Monitor** | Watch real-time translations as requests flow through the proxy |
|
||||
|
||||
**Use cases:**
|
||||
|
||||
- Debug why a specific client/provider combination fails
|
||||
- Verify that thinking tags, tool calls, and system prompts translate correctly
|
||||
- Compare format differences between OpenAI, Claude, Gemini, and Responses API formats
|
||||
|
||||
---
|
||||
|
||||
### Routing Strategies
|
||||
|
||||
Configure via **Dashboard → Settings → Routing**.
|
||||
|
||||
| Strategy | Description |
|
||||
| ------------------------------ | ------------------------------------------------------------------------------------------------ |
|
||||
| **Fill First** | Uses accounts in priority order — primary account handles all requests until unavailable |
|
||||
| **Round Robin** | Cycles through all accounts with a configurable sticky limit (default: 3 calls per account) |
|
||||
| **P2C (Power of Two Choices)** | Picks 2 random accounts and routes to the healthier one — balances load with awareness of health |
|
||||
| **Random** | Randomly selects an account for each request using Fisher-Yates shuffle |
|
||||
| **Least Used** | Routes to the account with the oldest `lastUsedAt` timestamp, distributing traffic evenly |
|
||||
| **Cost Optimized** | Routes to the account with the lowest priority value, optimizing for lowest-cost providers |
|
||||
|
||||
#### Wildcard Model Aliases
|
||||
|
||||
Create wildcard patterns to remap model names:
|
||||
|
||||
```
|
||||
Pattern: claude-sonnet-* → Target: cc/claude-sonnet-4-5-20250929
|
||||
Pattern: gpt-* → Target: gh/gpt-5.1-codex
|
||||
```
|
||||
|
||||
Wildcards support `*` (any characters) and `?` (single character).
|
||||
|
||||
#### Fallback Chains
|
||||
|
||||
Define global fallback chains that apply across all requests:
|
||||
|
||||
```
|
||||
Chain: production-fallback
|
||||
1. cc/claude-opus-4-6
|
||||
2. gh/gpt-5.1-codex
|
||||
3. glm/glm-4.7
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Resilience & Circuit Breakers
|
||||
|
||||
Configure via **Dashboard → Settings → Resilience**.
|
||||
|
||||
OmniRoute implements provider-level resilience with four components:
|
||||
|
||||
1. **Provider Profiles** — Per-provider configuration for:
|
||||
- Failure threshold (how many failures before opening)
|
||||
- Cooldown duration
|
||||
- Rate limit detection sensitivity
|
||||
- Exponential backoff parameters
|
||||
|
||||
2. **Editable Rate Limits** — System-level defaults configurable in the dashboard:
|
||||
- **Requests Per Minute (RPM)** — Maximum requests per minute per account
|
||||
- **Min Time Between Requests** — Minimum gap in milliseconds between requests
|
||||
- **Max Concurrent Requests** — Maximum simultaneous requests per account
|
||||
- Click **Edit** to modify, then **Save** or **Cancel**. Values persist via the resilience API.
|
||||
|
||||
3. **Circuit Breaker** — Tracks failures per provider and automatically opens the circuit when a threshold is reached:
|
||||
- **CLOSED** (Healthy) — Requests flow normally
|
||||
- **OPEN** — Provider is temporarily blocked after repeated failures
|
||||
- **HALF_OPEN** — Testing if provider has recovered
|
||||
|
||||
4. **Policies & Locked Identifiers** — Shows circuit breaker status and locked identifiers with force-unlock capability.
|
||||
|
||||
5. **Rate Limit Auto-Detection** — Monitors `429` and `Retry-After` headers to proactively avoid hitting provider rate limits.
|
||||
|
||||
**Pro Tip:** Use **Reset All** button to clear all circuit breakers and cooldowns when a provider recovers from an outage.
|
||||
|
||||
---
|
||||
|
||||
### Database Export / Import
|
||||
|
||||
Manage database backups in **Dashboard → Settings → System & Storage**.
|
||||
|
||||
| Action | Description |
|
||||
| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| **Export Database** | Downloads the current SQLite database as a `.sqlite` file |
|
||||
| **Export All (.tar.gz)** | Downloads a full backup archive including: database, settings, combos, provider connections (no credentials), API key metadata |
|
||||
| **Import Database** | Upload a `.sqlite` file to replace the current database. A pre-import backup is automatically created |
|
||||
|
||||
```bash
|
||||
# API: Export database
|
||||
curl -o backup.sqlite http://localhost:20128/api/db-backups/export
|
||||
|
||||
# API: Export all (full archive)
|
||||
curl -o backup.tar.gz http://localhost:20128/api/db-backups/exportAll
|
||||
|
||||
# API: Import database
|
||||
curl -X POST http://localhost:20128/api/db-backups/import \
|
||||
-F "file=@backup.sqlite"
|
||||
```
|
||||
|
||||
**Import Validation:** The imported file is validated for integrity (SQLite pragma check), required tables (`provider_connections`, `provider_nodes`, `combos`, `api_keys`), and size (max 100MB).
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Migrate OmniRoute between machines
|
||||
- Create external backups for disaster recovery
|
||||
- Share configurations between team members (export all → share archive)
|
||||
|
||||
---
|
||||
|
||||
### Settings Dashboard
|
||||
|
||||
The settings page is organized into 5 tabs for easy navigation:
|
||||
|
||||
| Tab | Contents |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------- |
|
||||
| **Security** | Login/Password settings, IP Access Control, API auth for `/models`, and Provider Blocking |
|
||||
| **Routing** | Global routing strategy (6 options), wildcard model aliases, fallback chains, combo defaults |
|
||||
| **Resilience** | Provider profiles, editable rate limits, circuit breaker status, policies & locked identifiers |
|
||||
| **AI** | Thinking budget configuration, global system prompt injection, prompt cache stats |
|
||||
| **Advanced** | Global proxy configuration (HTTP/SOCKS5) |
|
||||
|
||||
---
|
||||
|
||||
### Costs & Budget Management
|
||||
|
||||
Access via **Dashboard → Costs**.
|
||||
|
||||
| Tab | Purpose |
|
||||
| ----------- | ---------------------------------------------------------------------------------------- |
|
||||
| **Budget** | Set spending limits per API key with daily/weekly/monthly budgets and real-time tracking |
|
||||
| **Pricing** | View and edit model pricing entries — cost per 1K input/output tokens per provider |
|
||||
|
||||
```bash
|
||||
# API: Set a budget
|
||||
curl -X POST http://localhost:20128/api/usage/budget \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"keyId": "key-123", "limit": 50.00, "period": "monthly"}'
|
||||
|
||||
# API: Get current budget status
|
||||
curl http://localhost:20128/api/usage/budget
|
||||
```
|
||||
|
||||
**Cost Tracking:** Every request logs token usage and calculates cost using the pricing table. View breakdowns in **Dashboard → Usage** by provider, model, and API key.
|
||||
|
||||
---
|
||||
|
||||
### Audio Transcription
|
||||
|
||||
OmniRoute supports audio transcription via the OpenAI-compatible endpoint:
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
|
||||
# Example with curl
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@audio.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
Available providers: **Deepgram** (`deepgram/`), **AssemblyAI** (`assemblyai/`).
|
||||
|
||||
Supported audio formats: `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
### Combo Balancing Strategies
|
||||
|
||||
Configure per-combo balancing in **Dashboard → Combos → Create/Edit → Strategy**.
|
||||
|
||||
| Strategy | Description |
|
||||
| ------------------ | ------------------------------------------------------------------------ |
|
||||
| **Round-Robin** | Rotates through models sequentially |
|
||||
| **Priority** | Always tries the first model; falls back only on error |
|
||||
| **Random** | Picks a random model from the combo for each request |
|
||||
| **Weighted** | Routes proportionally based on assigned weights per model |
|
||||
| **Least-Used** | Routes to the model with the fewest recent requests (uses combo metrics) |
|
||||
| **Cost-Optimized** | Routes to the cheapest available model (uses pricing table) |
|
||||
|
||||
Global combo defaults can be set in **Dashboard → Settings → Routing → Combo Defaults**.
|
||||
|
||||
---
|
||||
|
||||
### Health Dashboard
|
||||
|
||||
Access via **Dashboard → Health**. Real-time system health overview with 6 cards:
|
||||
|
||||
| Card | What It Shows |
|
||||
| --------------------- | ----------------------------------------------------------- |
|
||||
| **System Status** | Uptime, version, memory usage, data directory |
|
||||
| **Provider Health** | Per-provider circuit breaker state (Closed/Open/Half-Open) |
|
||||
| **Rate Limits** | Active rate limit cooldowns per account with remaining time |
|
||||
| **Active Lockouts** | Providers temporarily blocked by the lockout policy |
|
||||
| **Signature Cache** | Deduplication cache stats (active keys, hit rate) |
|
||||
| **Latency Telemetry** | p50/p95/p99 latency aggregation per provider |
|
||||
|
||||
**Pro Tip:** The Health page auto-refreshes every 10 seconds. Use the circuit breaker card to identify which providers are experiencing issues.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application (Electron)
|
||||
|
||||
OmniRoute is available as a native desktop application for Windows, macOS, and Linux.
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# From the electron directory:
|
||||
cd electron
|
||||
npm install
|
||||
|
||||
# Development mode (connect to running Next.js dev server):
|
||||
npm run dev
|
||||
|
||||
# Production mode (uses standalone build):
|
||||
npm start
|
||||
```
|
||||
|
||||
### Building Installers
|
||||
|
||||
```bash
|
||||
cd electron
|
||||
npm run build # Current platform
|
||||
npm run build:win # Windows (.exe NSIS)
|
||||
npm run build:mac # macOS (.dmg universal)
|
||||
npm run build:linux # Linux (.AppImage)
|
||||
```
|
||||
|
||||
Output → `electron/dist-electron/`
|
||||
|
||||
### Key Features
|
||||
|
||||
| Feature | Description |
|
||||
| --------------------------- | ---------------------------------------------------- |
|
||||
| **Server Readiness** | Polls server before showing window (no blank screen) |
|
||||
| **System Tray** | Minimize to tray, change port, quit from tray menu |
|
||||
| **Port Management** | Change server port from tray (auto-restarts server) |
|
||||
| **Content Security Policy** | Restrictive CSP via session headers |
|
||||
| **Single Instance** | Only one app instance can run at a time |
|
||||
| **Offline Mode** | Bundled Next.js server works without internet |
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| --------------------- | ------- | -------------------------------- |
|
||||
| `OMNIROUTE_PORT` | `20128` | Server port |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit (64–16384 MB) |
|
||||
|
||||
📖 Full documentation: [`electron/README.md`](../electron/README.md)
|
||||
@@ -0,0 +1,403 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/VM_DEPLOYMENT_GUIDE.md) · 🇪🇸 [es](../es/VM_DEPLOYMENT_GUIDE.md) · 🇫🇷 [fr](../fr/VM_DEPLOYMENT_GUIDE.md) · 🇩🇪 [de](../de/VM_DEPLOYMENT_GUIDE.md) · 🇮🇹 [it](../it/VM_DEPLOYMENT_GUIDE.md) · 🇷🇺 [ru](../ru/VM_DEPLOYMENT_GUIDE.md) · 🇨🇳 [zh-CN](../zh-CN/VM_DEPLOYMENT_GUIDE.md) · 🇯🇵 [ja](../ja/VM_DEPLOYMENT_GUIDE.md) · 🇰🇷 [ko](../ko/VM_DEPLOYMENT_GUIDE.md) · 🇸🇦 [ar](../ar/VM_DEPLOYMENT_GUIDE.md) · 🇮🇳 [in](../in/VM_DEPLOYMENT_GUIDE.md) · 🇹🇭 [th](../th/VM_DEPLOYMENT_GUIDE.md) · 🇻🇳 [vi](../vi/VM_DEPLOYMENT_GUIDE.md) · 🇮🇩 [id](../id/VM_DEPLOYMENT_GUIDE.md) · 🇲🇾 [ms](../ms/VM_DEPLOYMENT_GUIDE.md) · 🇳🇱 [nl](../nl/VM_DEPLOYMENT_GUIDE.md) · 🇵🇱 [pl](../pl/VM_DEPLOYMENT_GUIDE.md) · 🇸🇪 [sv](../sv/VM_DEPLOYMENT_GUIDE.md) · 🇳🇴 [no](../no/VM_DEPLOYMENT_GUIDE.md) · 🇩🇰 [da](../da/VM_DEPLOYMENT_GUIDE.md) · 🇫🇮 [fi](../fi/VM_DEPLOYMENT_GUIDE.md) · 🇵🇹 [pt](../pt/VM_DEPLOYMENT_GUIDE.md) · 🇷🇴 [ro](../ro/VM_DEPLOYMENT_GUIDE.md) · 🇭🇺 [hu](../hu/VM_DEPLOYMENT_GUIDE.md) · 🇧🇬 [bg](../bg/VM_DEPLOYMENT_GUIDE.md) · 🇸🇰 [sk](../sk/VM_DEPLOYMENT_GUIDE.md) · 🇺🇦 [uk-UA](../uk-UA/VM_DEPLOYMENT_GUIDE.md) · 🇮🇱 [he](../he/VM_DEPLOYMENT_GUIDE.md) · 🇵🇭 [phi](../phi/VM_DEPLOYMENT_GUIDE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Guia de Deploy em VM com Cloudflare
|
||||
|
||||
Guia completo para instalar e configurar o OmniRoute em uma VM (VPS) com domínio gerenciado via Cloudflare.
|
||||
|
||||
---
|
||||
|
||||
## Pré-Requisitos
|
||||
|
||||
| Item | Mínimo | Recomendado |
|
||||
| ----------- | ------------------------ | ---------------- |
|
||||
| **CPU** | 1 vCPU | 2 vCPU |
|
||||
| **RAM** | 1 GB | 2 GB |
|
||||
| **Disco** | 10 GB SSD | 25 GB SSD |
|
||||
| **SO** | Ubuntu 22.04 LTS | Ubuntu 24.04 LTS |
|
||||
| **Domínio** | Registrado no Cloudflare | — |
|
||||
| **Docker** | Docker Engine 24+ | Docker 27+ |
|
||||
|
||||
**Providers testados**: Akamai (Linode), DigitalOcean, Vultr, Hetzner, AWS Lightsail.
|
||||
|
||||
---
|
||||
|
||||
## 1. Configurar a VM
|
||||
|
||||
### 1.1 Criar a instância
|
||||
|
||||
No seu provider de VPS preferido:
|
||||
|
||||
- Escolha Ubuntu 24.04 LTS
|
||||
- Selecione o plano mínimo (1 vCPU / 1 GB RAM)
|
||||
- Defina uma senha forte para root ou configure SSH key
|
||||
- Anote o **IP público** (ex: `203.0.113.10`)
|
||||
|
||||
### 1.2 Conectar via SSH
|
||||
|
||||
```bash
|
||||
ssh root@203.0.113.10
|
||||
```
|
||||
|
||||
### 1.3 Atualizar o sistema
|
||||
|
||||
```bash
|
||||
apt update && apt upgrade -y
|
||||
```
|
||||
|
||||
### 1.4 Instalar Docker
|
||||
|
||||
```bash
|
||||
# Instalar dependências
|
||||
apt install -y ca-certificates curl gnupg
|
||||
|
||||
# Adicionar repositório oficial do Docker
|
||||
install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
apt update
|
||||
apt install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
|
||||
```
|
||||
|
||||
### 1.5 Instalar nginx
|
||||
|
||||
```bash
|
||||
apt install -y nginx
|
||||
```
|
||||
|
||||
### 1.6 Configurar Firewall (UFW)
|
||||
|
||||
```bash
|
||||
ufw default deny incoming
|
||||
ufw default allow outgoing
|
||||
ufw allow 22/tcp # SSH
|
||||
ufw allow 80/tcp # HTTP (redirect)
|
||||
ufw allow 443/tcp # HTTPS
|
||||
ufw enable
|
||||
```
|
||||
|
||||
> **Dica**: Para segurança máxima, restrinja as portas 80 e 443 apenas para IPs da Cloudflare. Veja a seção [Segurança Avançada](#segurança-avançada).
|
||||
|
||||
---
|
||||
|
||||
## 2. Instalar o OmniRoute
|
||||
|
||||
### 2.1 Criar diretório de configuração
|
||||
|
||||
```bash
|
||||
mkdir -p /opt/omniroute
|
||||
```
|
||||
|
||||
### 2.2 Criar arquivo de variáveis de ambiente
|
||||
|
||||
```bash
|
||||
cat > /opt/omniroute/.env << 'EOF'
|
||||
# === Segurança ===
|
||||
JWT_SECRET=ALTERE-PARA-CHAVE-SECRETA-UNICA-64-CHARS
|
||||
INITIAL_PASSWORD=SuaSenhaSegura123!
|
||||
API_KEY_SECRET=ALTERE-PARA-OUTRA-CHAVE-SECRETA
|
||||
STORAGE_ENCRYPTION_KEY=ALTERE-PARA-TERCEIRA-CHAVE-SECRETA
|
||||
STORAGE_ENCRYPTION_KEY_VERSION=v1
|
||||
MACHINE_ID_SALT=ALTERE-PARA-SALT-UNICO
|
||||
|
||||
# === App ===
|
||||
PORT=20128
|
||||
NODE_ENV=production
|
||||
HOSTNAME=0.0.0.0
|
||||
DATA_DIR=/app/data
|
||||
STORAGE_DRIVER=sqlite
|
||||
ENABLE_REQUEST_LOGS=true
|
||||
AUTH_COOKIE_SECURE=false
|
||||
REQUIRE_API_KEY=false
|
||||
|
||||
# === Domain (altere para seu domínio) ===
|
||||
BASE_URL=https://llms.seudominio.com
|
||||
NEXT_PUBLIC_BASE_URL=https://llms.seudominio.com
|
||||
|
||||
# === Cloud Sync (opcional) ===
|
||||
# CLOUD_URL=https://cloud.omniroute.online
|
||||
# NEXT_PUBLIC_CLOUD_URL=https://cloud.omniroute.online
|
||||
EOF
|
||||
```
|
||||
|
||||
> ⚠️ **IMPORTANTE**: Gere chaves secretas únicas! Use `openssl rand -hex 32` para cada chave.
|
||||
|
||||
### 2.3 Iniciar o container
|
||||
|
||||
```bash
|
||||
docker pull diegosouzapw/omniroute:latest
|
||||
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
--env-file /opt/omniroute/.env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
### 2.4 Verificar se está rodando
|
||||
|
||||
```bash
|
||||
docker ps | grep omniroute
|
||||
docker logs omniroute --tail 20
|
||||
```
|
||||
|
||||
Deve exibir: `[DB] SQLite database ready` e `listening on port 20128`.
|
||||
|
||||
---
|
||||
|
||||
## 3. Configurar nginx (Reverse Proxy)
|
||||
|
||||
### 3.1 Gerar certificado SSL (Cloudflare Origin)
|
||||
|
||||
No painel da Cloudflare:
|
||||
|
||||
1. Vá em **SSL/TLS → Origin Server**
|
||||
2. Clique **Create Certificate**
|
||||
3. Deixe os padrões (15 anos, \*.seudominio.com)
|
||||
4. Copie o **Origin Certificate** e a **Private Key**
|
||||
|
||||
```bash
|
||||
mkdir -p /etc/nginx/ssl
|
||||
|
||||
# Colar o certificado
|
||||
nano /etc/nginx/ssl/origin.crt
|
||||
|
||||
# Colar a chave privada
|
||||
nano /etc/nginx/ssl/origin.key
|
||||
|
||||
chmod 600 /etc/nginx/ssl/origin.key
|
||||
```
|
||||
|
||||
### 3.2 Configuração do nginx
|
||||
|
||||
```bash
|
||||
cat > /etc/nginx/sites-available/omniroute << 'NGINX'
|
||||
# Default server — bloqueia acesso direto por IP
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen [::]:80 default_server;
|
||||
listen 443 ssl default_server;
|
||||
listen [::]:443 ssl default_server;
|
||||
ssl_certificate /etc/nginx/ssl/origin.crt;
|
||||
ssl_certificate_key /etc/nginx/ssl/origin.key;
|
||||
server_name _;
|
||||
return 444;
|
||||
}
|
||||
|
||||
# OmniRoute — HTTPS
|
||||
server {
|
||||
listen 443 ssl;
|
||||
listen [::]:443 ssl;
|
||||
server_name llms.seudominio.com; # Altere para seu domínio
|
||||
|
||||
ssl_certificate /etc/nginx/ssl/origin.crt;
|
||||
ssl_certificate_key /etc/nginx/ssl/origin.key;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
|
||||
client_max_body_size 100M;
|
||||
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:20128;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
# SSE (Server-Sent Events) — streaming AI responses
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
}
|
||||
}
|
||||
|
||||
# HTTP → HTTPS redirect
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
server_name llms.seudominio.com;
|
||||
return 301 https://$server_name$request_uri;
|
||||
}
|
||||
NGINX
|
||||
```
|
||||
|
||||
### 3.3 Ativar e testar
|
||||
|
||||
```bash
|
||||
# Remover config padrão
|
||||
rm -f /etc/nginx/sites-enabled/default
|
||||
|
||||
# Ativar OmniRoute
|
||||
ln -sf /etc/nginx/sites-available/omniroute /etc/nginx/sites-enabled/omniroute
|
||||
|
||||
# Testar e recarregar
|
||||
nginx -t && systemctl reload nginx
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Configurar Cloudflare DNS
|
||||
|
||||
### 4.1 Adicionar registro DNS
|
||||
|
||||
No painel da Cloudflare → DNS:
|
||||
|
||||
| Type | Name | Content | Proxy |
|
||||
| ---- | ------ | ------------------------- | ---------- |
|
||||
| A | `llms` | `203.0.113.10` (IP da VM) | ✅ Proxied |
|
||||
|
||||
### 4.2 Configurar SSL
|
||||
|
||||
Em **SSL/TLS → Overview**:
|
||||
|
||||
- Modo: **Full (Strict)**
|
||||
|
||||
Em **SSL/TLS → Edge Certificates**:
|
||||
|
||||
- Always Use HTTPS: ✅ On
|
||||
- Minimum TLS Version: TLS 1.2
|
||||
- Automatic HTTPS Rewrites: ✅ On
|
||||
|
||||
### 4.3 Testar
|
||||
|
||||
```bash
|
||||
curl -sI https://llms.seudominio.com/health
|
||||
# Deve retornar HTTP/2 200
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Operações e Manutenção
|
||||
|
||||
### Atualizar para nova versão
|
||||
|
||||
```bash
|
||||
docker pull diegosouzapw/omniroute:latest
|
||||
docker stop omniroute && docker rm omniroute
|
||||
docker run -d --name omniroute --restart unless-stopped \
|
||||
--env-file /opt/omniroute/.env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
### Ver logs
|
||||
|
||||
```bash
|
||||
docker logs -f omniroute # Stream em tempo real
|
||||
docker logs omniroute --tail 50 # Últimas 50 linhas
|
||||
```
|
||||
|
||||
### Backup manual do banco
|
||||
|
||||
```bash
|
||||
# Copiar dados do volume para o host
|
||||
docker cp omniroute:/app/data ./backup-$(date +%F)
|
||||
|
||||
# Ou comprimir todo o volume
|
||||
docker run --rm -v omniroute-data:/data -v $(pwd):/backup \
|
||||
alpine tar czf /backup/omniroute-data-$(date +%F).tar.gz /data
|
||||
```
|
||||
|
||||
### Restaurar de backup
|
||||
|
||||
```bash
|
||||
docker stop omniroute
|
||||
docker run --rm -v omniroute-data:/data -v $(pwd):/backup \
|
||||
alpine sh -c "rm -rf /data/* && tar xzf /backup/omniroute-data-YYYY-MM-DD.tar.gz -C /"
|
||||
docker start omniroute
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Segurança Avançada
|
||||
|
||||
### Restringir nginx para Cloudflare IPs
|
||||
|
||||
```bash
|
||||
cat > /etc/nginx/cloudflare-ips.conf << 'CF'
|
||||
# Cloudflare IPv4 ranges — atualizar periodicamente
|
||||
# https://www.cloudflare.com/ips-v4/
|
||||
set_real_ip_from 173.245.48.0/20;
|
||||
set_real_ip_from 103.21.244.0/22;
|
||||
set_real_ip_from 103.22.200.0/22;
|
||||
set_real_ip_from 103.31.4.0/22;
|
||||
set_real_ip_from 141.101.64.0/18;
|
||||
set_real_ip_from 108.162.192.0/18;
|
||||
set_real_ip_from 190.93.240.0/20;
|
||||
set_real_ip_from 188.114.96.0/20;
|
||||
set_real_ip_from 197.234.240.0/22;
|
||||
set_real_ip_from 198.41.128.0/17;
|
||||
set_real_ip_from 162.158.0.0/15;
|
||||
set_real_ip_from 104.16.0.0/13;
|
||||
set_real_ip_from 104.24.0.0/14;
|
||||
set_real_ip_from 172.64.0.0/13;
|
||||
set_real_ip_from 131.0.72.0/22;
|
||||
real_ip_header CF-Connecting-IP;
|
||||
CF
|
||||
```
|
||||
|
||||
Adicionar no `nginx.conf` dentro do bloco `http {}`:
|
||||
|
||||
```nginx
|
||||
include /etc/nginx/cloudflare-ips.conf;
|
||||
```
|
||||
|
||||
### Install fail2ban
|
||||
|
||||
```bash
|
||||
apt install -y fail2ban
|
||||
systemctl enable fail2ban
|
||||
systemctl start fail2ban
|
||||
|
||||
# Verificar status
|
||||
fail2ban-client status sshd
|
||||
```
|
||||
|
||||
### Bloquear acesso direto na porta do Docker
|
||||
|
||||
```bash
|
||||
# Impedir acesso externo direto à porta 20128
|
||||
iptables -I DOCKER-USER -p tcp --dport 20128 -j DROP
|
||||
iptables -I DOCKER-USER -i lo -p tcp --dport 20128 -j ACCEPT
|
||||
|
||||
# Persistir as regras
|
||||
apt install -y iptables-persistent
|
||||
netfilter-persistent save
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Deploy do Cloud Worker (Opcional)
|
||||
|
||||
Para acesso remoto via Cloudflare Workers (sem expor a VM diretamente):
|
||||
|
||||
```bash
|
||||
# No repositório local
|
||||
cd omnirouteCloud
|
||||
npm install
|
||||
npx wrangler login
|
||||
npx wrangler deploy
|
||||
```
|
||||
|
||||
Ver documentação completa em [omnirouteCloud/README.md](../omnirouteCloud/README.md).
|
||||
|
||||
---
|
||||
|
||||
## Resumo de Portas
|
||||
|
||||
| Porta | Serviço | Acesso |
|
||||
| ----- | ----------- | ----------------------------- |
|
||||
| 22 | SSH | Público (com fail2ban) |
|
||||
| 80 | nginx HTTP | Redirect → HTTPS |
|
||||
| 443 | nginx HTTPS | Via Cloudflare Proxy |
|
||||
| 20128 | OmniRoute | Somente localhost (via nginx) |
|
||||
@@ -0,0 +1,200 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/A2A-SERVER.md) · 🇪🇸 [es](../es/A2A-SERVER.md) · 🇫🇷 [fr](../fr/A2A-SERVER.md) · 🇩🇪 [de](../de/A2A-SERVER.md) · 🇮🇹 [it](../it/A2A-SERVER.md) · 🇷🇺 [ru](../ru/A2A-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/A2A-SERVER.md) · 🇯🇵 [ja](../ja/A2A-SERVER.md) · 🇰🇷 [ko](../ko/A2A-SERVER.md) · 🇸🇦 [ar](../ar/A2A-SERVER.md) · 🇮🇳 [in](../in/A2A-SERVER.md) · 🇹🇭 [th](../th/A2A-SERVER.md) · 🇻🇳 [vi](../vi/A2A-SERVER.md) · 🇮🇩 [id](../id/A2A-SERVER.md) · 🇲🇾 [ms](../ms/A2A-SERVER.md) · 🇳🇱 [nl](../nl/A2A-SERVER.md) · 🇵🇱 [pl](../pl/A2A-SERVER.md) · 🇸🇪 [sv](../sv/A2A-SERVER.md) · 🇳🇴 [no](../no/A2A-SERVER.md) · 🇩🇰 [da](../da/A2A-SERVER.md) · 🇫🇮 [fi](../fi/A2A-SERVER.md) · 🇵🇹 [pt](../pt/A2A-SERVER.md) · 🇷🇴 [ro](../ro/A2A-SERVER.md) · 🇭🇺 [hu](../hu/A2A-SERVER.md) · 🇧🇬 [bg](../bg/A2A-SERVER.md) · 🇸🇰 [sk](../sk/A2A-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/A2A-SERVER.md) · 🇮🇱 [he](../he/A2A-SERVER.md) · 🇵🇭 [phi](../phi/A2A-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute A2A Server Documentation
|
||||
|
||||
> Agent-to-Agent Protocol v0.3 — OmniRoute as an intelligent routing agent
|
||||
|
||||
## Agent Discovery
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/.well-known/agent.json
|
||||
```
|
||||
|
||||
Returns the Agent Card describing OmniRoute's capabilities, skills, and authentication requirements.
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
All `/a2a` requests require an API key via the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer YOUR_OMNIROUTE_API_KEY
|
||||
```
|
||||
|
||||
If no API key is configured on the server, authentication is bypassed.
|
||||
|
||||
---
|
||||
|
||||
## JSON-RPC 2.0 Methods
|
||||
|
||||
### `message/send` — Synchronous Execution
|
||||
|
||||
Sends a message to a skill and waits for the complete response.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Write a hello world in Python"}],
|
||||
"metadata": {"model": "auto", "combo": "fast-coding"}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"result": {
|
||||
"task": { "id": "uuid", "state": "completed" },
|
||||
"artifacts": [{ "type": "text", "content": "..." }],
|
||||
"metadata": {
|
||||
"routing_explanation": "Selected claude-sonnet via provider \"anthropic\" (latency: 1200ms, cost: $0.003)",
|
||||
"cost_envelope": { "estimated": 0.005, "actual": 0.003, "currency": "USD" },
|
||||
"resilience_trace": [
|
||||
{ "event": "primary_selected", "provider": "anthropic", "timestamp": "..." }
|
||||
],
|
||||
"policy_verdict": { "allowed": true, "reason": "within budget and quota limits" }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `message/stream` — SSE Streaming
|
||||
|
||||
Same as `message/send` but returns Server-Sent Events for real-time streaming.
|
||||
|
||||
```bash
|
||||
curl -N -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/stream",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Explain quantum computing"}]
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**SSE Events:**
|
||||
|
||||
```
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"working"},"chunk":{"type":"text","content":"..."}}}
|
||||
|
||||
: heartbeat 2026-03-03T17:00:00Z
|
||||
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"completed"},"metadata":{...}}}
|
||||
```
|
||||
|
||||
### `tasks/get` — Query Task Status
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"2","method":"tasks/get","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
### `tasks/cancel` — Cancel a Task
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"3","method":"tasks/cancel","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Skills
|
||||
|
||||
| Skill | Description |
|
||||
| :----------------- | :------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `smart-routing` | Routes prompts through OmniRoute's intelligent pipeline. Returns response with routing explanation, cost, and resilience trace. |
|
||||
| `quota-management` | Answers natural-language queries about provider quotas, suggests free combos, and provides quota rankings. |
|
||||
|
||||
---
|
||||
|
||||
## Task Lifecycle
|
||||
|
||||
```
|
||||
submitted → working → completed
|
||||
→ failed
|
||||
→ cancelled
|
||||
```
|
||||
|
||||
- Tasks expire after 5 minutes (configurable)
|
||||
- Terminal states: `completed`, `failed`, `cancelled`
|
||||
- Event log tracks every state transition
|
||||
|
||||
---
|
||||
|
||||
## Error Codes
|
||||
|
||||
| Code | Meaning |
|
||||
| :----- | :----------------------------- |
|
||||
| -32700 | Parse error (invalid JSON) |
|
||||
| -32600 | Invalid request / Unauthorized |
|
||||
| -32601 | Method or skill not found |
|
||||
| -32602 | Invalid params |
|
||||
| -32603 | Internal error |
|
||||
|
||||
---
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### Python (requests)
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
resp = requests.post("http://localhost:20128/a2a", json={
|
||||
"jsonrpc": "2.0", "id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}
|
||||
}, headers={"Authorization": "Bearer YOUR_KEY"})
|
||||
|
||||
result = resp.json()["result"]
|
||||
print(result["artifacts"][0]["content"])
|
||||
print(result["metadata"]["routing_explanation"])
|
||||
```
|
||||
|
||||
### TypeScript (fetch)
|
||||
|
||||
```typescript
|
||||
const resp = await fetch("http://localhost:20128/a2a", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: "Bearer YOUR_KEY",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: "1",
|
||||
method: "message/send",
|
||||
params: {
|
||||
skill: "smart-routing",
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
const { result } = await resp.json();
|
||||
console.log(result.metadata.routing_explanation);
|
||||
```
|
||||
@@ -0,0 +1,455 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/API_REFERENCE.md) · 🇪🇸 [es](../es/API_REFERENCE.md) · 🇫🇷 [fr](../fr/API_REFERENCE.md) · 🇩🇪 [de](../de/API_REFERENCE.md) · 🇮🇹 [it](../it/API_REFERENCE.md) · 🇷🇺 [ru](../ru/API_REFERENCE.md) · 🇨🇳 [zh-CN](../zh-CN/API_REFERENCE.md) · 🇯🇵 [ja](../ja/API_REFERENCE.md) · 🇰🇷 [ko](../ko/API_REFERENCE.md) · 🇸🇦 [ar](../ar/API_REFERENCE.md) · 🇮🇳 [in](../in/API_REFERENCE.md) · 🇹🇭 [th](../th/API_REFERENCE.md) · 🇻🇳 [vi](../vi/API_REFERENCE.md) · 🇮🇩 [id](../id/API_REFERENCE.md) · 🇲🇾 [ms](../ms/API_REFERENCE.md) · 🇳🇱 [nl](../nl/API_REFERENCE.md) · 🇵🇱 [pl](../pl/API_REFERENCE.md) · 🇸🇪 [sv](../sv/API_REFERENCE.md) · 🇳🇴 [no](../no/API_REFERENCE.md) · 🇩🇰 [da](../da/API_REFERENCE.md) · 🇫🇮 [fi](../fi/API_REFERENCE.md) · 🇵🇹 [pt](../pt/API_REFERENCE.md) · 🇷🇴 [ro](../ro/API_REFERENCE.md) · 🇭🇺 [hu](../hu/API_REFERENCE.md) · 🇧🇬 [bg](../bg/API_REFERENCE.md) · 🇸🇰 [sk](../sk/API_REFERENCE.md) · 🇺🇦 [uk-UA](../uk-UA/API_REFERENCE.md) · 🇮🇱 [he](../he/API_REFERENCE.md) · 🇵🇭 [phi](../phi/API_REFERENCE.md)
|
||||
|
||||
---
|
||||
|
||||
# API Reference
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](API_REFERENCE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/API_REFERENCE.md) | 🇪🇸 [Español](i18n/es/API_REFERENCE.md) | 🇫🇷 [Français](i18n/fr/API_REFERENCE.md) | 🇮🇹 [Italiano](i18n/it/API_REFERENCE.md) | 🇷🇺 [Русский](i18n/ru/API_REFERENCE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/API_REFERENCE.md) | 🇩🇪 [Deutsch](i18n/de/API_REFERENCE.md) | 🇮🇳 [हिन्दी](i18n/in/API_REFERENCE.md) | 🇹🇭 [ไทย](i18n/th/API_REFERENCE.md) | 🇺🇦 [Українська](i18n/uk-UA/API_REFERENCE.md) | 🇸🇦 [العربية](i18n/ar/API_REFERENCE.md) | 🇯🇵 [日本語](i18n/ja/API_REFERENCE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/API_REFERENCE.md) | 🇧🇬 [Български](i18n/bg/API_REFERENCE.md) | 🇩🇰 [Dansk](i18n/da/API_REFERENCE.md) | 🇫🇮 [Suomi](i18n/fi/API_REFERENCE.md) | 🇮🇱 [עברית](i18n/he/API_REFERENCE.md) | 🇭🇺 [Magyar](i18n/hu/API_REFERENCE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/API_REFERENCE.md) | 🇰🇷 [한국어](i18n/ko/API_REFERENCE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/API_REFERENCE.md) | 🇳🇱 [Nederlands](i18n/nl/API_REFERENCE.md) | 🇳🇴 [Norsk](i18n/no/API_REFERENCE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/API_REFERENCE.md) | 🇷🇴 [Română](i18n/ro/API_REFERENCE.md) | 🇵🇱 [Polski](i18n/pl/API_REFERENCE.md) | 🇸🇰 [Slovenčina](i18n/sk/API_REFERENCE.md) | 🇸🇪 [Svenska](i18n/sv/API_REFERENCE.md) | 🇵🇭 [Filipino](i18n/phi/API_REFERENCE.md)
|
||||
|
||||
Complete reference for all OmniRoute API endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Chat Completions](#chat-completions)
|
||||
- [Embeddings](#embeddings)
|
||||
- [Image Generation](#image-generation)
|
||||
- [List Models](#list-models)
|
||||
- [Compatibility Endpoints](#compatibility-endpoints)
|
||||
- [Semantic Cache](#semantic-cache)
|
||||
- [Dashboard & Management](#dashboard--management)
|
||||
- [Request Processing](#request-processing)
|
||||
- [Authentication](#authentication)
|
||||
|
||||
---
|
||||
|
||||
## Chat Completions
|
||||
|
||||
```bash
|
||||
POST /v1/chat/completions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "cc/claude-opus-4-6",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Write a function to..."}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
```
|
||||
|
||||
### Custom Headers
|
||||
|
||||
| Header | Direction | Description |
|
||||
| ------------------------ | --------- | --------------------------------- |
|
||||
| `X-OmniRoute-No-Cache` | Request | Set to `true` to bypass cache |
|
||||
| `X-OmniRoute-Progress` | Request | Set to `true` for progress events |
|
||||
| `Idempotency-Key` | Request | Dedup key (5s window) |
|
||||
| `X-Request-Id` | Request | Alternative dedup key |
|
||||
| `X-OmniRoute-Cache` | Response | `HIT` or `MISS` (non-streaming) |
|
||||
| `X-OmniRoute-Idempotent` | Response | `true` if deduplicated |
|
||||
| `X-OmniRoute-Progress` | Response | `enabled` if progress tracking on |
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
```bash
|
||||
POST /v1/embeddings
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "nebius/Qwen/Qwen3-Embedding-8B",
|
||||
"input": "The food was delicious"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: Nebius, OpenAI, Mistral, Together AI, Fireworks, NVIDIA.
|
||||
|
||||
```bash
|
||||
# List all embedding models
|
||||
GET /v1/embeddings
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Image Generation
|
||||
|
||||
```bash
|
||||
POST /v1/images/generations
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "openai/dall-e-3",
|
||||
"prompt": "A beautiful sunset over mountains",
|
||||
"size": "1024x1024"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: OpenAI (DALL-E), xAI (Grok Image), Together AI (FLUX), Fireworks AI.
|
||||
|
||||
```bash
|
||||
# List all image models
|
||||
GET /v1/images/generations
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## List Models
|
||||
|
||||
```bash
|
||||
GET /v1/models
|
||||
Authorization: Bearer your-api-key
|
||||
|
||||
→ Returns all chat, embedding, and image models + combos in OpenAI format
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Compatibility Endpoints
|
||||
|
||||
| Method | Path | Format |
|
||||
| ------ | --------------------------- | ---------------------- |
|
||||
| POST | `/v1/chat/completions` | OpenAI |
|
||||
| POST | `/v1/messages` | Anthropic |
|
||||
| POST | `/v1/responses` | OpenAI Responses |
|
||||
| POST | `/v1/embeddings` | OpenAI |
|
||||
| POST | `/v1/images/generations` | OpenAI |
|
||||
| GET | `/v1/models` | OpenAI |
|
||||
| POST | `/v1/messages/count_tokens` | Anthropic |
|
||||
| GET | `/v1beta/models` | Gemini |
|
||||
| POST | `/v1beta/models/{...path}` | Gemini generateContent |
|
||||
| POST | `/v1/api/chat` | Ollama |
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
```bash
|
||||
POST /v1/providers/{provider}/chat/completions
|
||||
POST /v1/providers/{provider}/embeddings
|
||||
POST /v1/providers/{provider}/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
---
|
||||
|
||||
## Semantic Cache
|
||||
|
||||
```bash
|
||||
# Get cache stats
|
||||
GET /api/cache
|
||||
|
||||
# Clear all caches
|
||||
DELETE /api/cache
|
||||
```
|
||||
|
||||
Response example:
|
||||
|
||||
```json
|
||||
{
|
||||
"semanticCache": {
|
||||
"memorySize": 42,
|
||||
"memoryMaxSize": 500,
|
||||
"dbSize": 128,
|
||||
"hitRate": 0.65
|
||||
},
|
||||
"idempotency": {
|
||||
"activeKeys": 3,
|
||||
"windowMs": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dashboard & Management
|
||||
|
||||
### Authentication
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------------- | ------- | --------------------- |
|
||||
| `/api/auth/login` | POST | Login |
|
||||
| `/api/auth/logout` | POST | Logout |
|
||||
| `/api/settings/require-login` | GET/PUT | Toggle login required |
|
||||
|
||||
### Provider Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------- | --------------- | ------------------------ |
|
||||
| `/api/providers` | GET/POST | List / create providers |
|
||||
| `/api/providers/[id]` | GET/PUT/DELETE | Manage a provider |
|
||||
| `/api/providers/[id]/test` | POST | Test provider connection |
|
||||
| `/api/providers/[id]/models` | GET | List provider models |
|
||||
| `/api/providers/validate` | POST | Validate provider config |
|
||||
| `/api/provider-nodes*` | Various | Provider node management |
|
||||
| `/api/provider-models` | GET/POST/DELETE | Custom models |
|
||||
|
||||
### OAuth Flows
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------------- | ------- | ----------------------- |
|
||||
| `/api/oauth/[provider]/[action]` | Various | Provider-specific OAuth |
|
||||
|
||||
### Routing & Config
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------- | -------- | ----------------------------- |
|
||||
| `/api/models/alias` | GET/POST | Model aliases |
|
||||
| `/api/models/catalog` | GET | All models by provider + type |
|
||||
| `/api/combos*` | Various | Combo management |
|
||||
| `/api/keys*` | Various | API key management |
|
||||
| `/api/pricing` | GET | Model pricing |
|
||||
|
||||
### Usage & Analytics
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | -------------------- |
|
||||
| `/api/usage/history` | GET | Usage history |
|
||||
| `/api/usage/logs` | GET | Usage logs |
|
||||
| `/api/usage/request-logs` | GET | Request-level logs |
|
||||
| `/api/usage/[connectionId]` | GET | Per-connection usage |
|
||||
|
||||
### Settings
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------------- | ------- | ---------------------- |
|
||||
| `/api/settings` | GET/PUT | General settings |
|
||||
| `/api/settings/proxy` | GET/PUT | Network proxy config |
|
||||
| `/api/settings/proxy/test` | POST | Test proxy connection |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt |
|
||||
|
||||
### Monitoring
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------ | ---------- | ----------------------- |
|
||||
| `/api/sessions` | GET | Active session tracking |
|
||||
| `/api/rate-limits` | GET | Per-account rate limits |
|
||||
| `/api/monitoring/health` | GET | Health check |
|
||||
| `/api/cache` | GET/DELETE | Cache stats / clear |
|
||||
|
||||
### Backup & Export/Import
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | --------------------------------------- |
|
||||
| `/api/db-backups` | GET | List available backups |
|
||||
| `/api/db-backups` | PUT | Create a manual backup |
|
||||
| `/api/db-backups` | POST | Restore from a specific backup |
|
||||
| `/api/db-backups/export` | GET | Download database as .sqlite file |
|
||||
| `/api/db-backups/import` | POST | Upload .sqlite file to replace database |
|
||||
| `/api/db-backups/exportAll` | GET | Download full backup as .tar.gz archive |
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------- | ------- | --------------------- |
|
||||
| `/api/sync/cloud` | Various | Cloud sync operations |
|
||||
| `/api/sync/initialize` | POST | Initialize sync |
|
||||
| `/api/cloud/*` | Various | Cloud management |
|
||||
|
||||
### CLI Tools
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------------- | ------ | ------------------- |
|
||||
| `/api/cli-tools/claude-settings` | GET | Claude CLI status |
|
||||
| `/api/cli-tools/codex-settings` | GET | Codex CLI status |
|
||||
| `/api/cli-tools/droid-settings` | GET | Droid CLI status |
|
||||
| `/api/cli-tools/openclaw-settings` | GET | OpenClaw CLI status |
|
||||
| `/api/cli-tools/runtime/[toolId]` | GET | Generic CLI runtime |
|
||||
|
||||
CLI responses include: `installed`, `runnable`, `command`, `commandPath`, `runtimeMode`, `reason`.
|
||||
|
||||
### ACP Agents
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------- | ------ | -------------------------------------------------------- |
|
||||
| `/api/acp/agents` | GET | List all detected agents (built-in + custom) with status |
|
||||
| `/api/acp/agents` | POST | Add custom agent or refresh detection cache |
|
||||
| `/api/acp/agents` | DELETE | Remove a custom agent by `id` query param |
|
||||
|
||||
GET response includes `agents[]` (id, name, binary, version, installed, protocol, isCustom) and `summary` (total, installed, notFound, builtIn, custom).
|
||||
|
||||
### Resilience & Rate Limits
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------- | ------- | ------------------------------- |
|
||||
| `/api/resilience` | GET/PUT | Get/update resilience profiles |
|
||||
| `/api/resilience/reset` | POST | Reset circuit breakers |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
| `/api/rate-limit` | GET | Global rate limit configuration |
|
||||
|
||||
### Evals
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------ | -------- | --------------------------------- |
|
||||
| `/api/evals` | GET/POST | List eval suites / run evaluation |
|
||||
|
||||
### Policies
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | --------------- | ----------------------- |
|
||||
| `/api/policies` | GET/POST/DELETE | Manage routing policies |
|
||||
|
||||
### Compliance
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | ----------------------------- |
|
||||
| `/api/compliance/audit-log` | GET | Compliance audit log (last N) |
|
||||
|
||||
### v1beta (Gemini-Compatible)
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------- | ------ | --------------------------------- |
|
||||
| `/v1beta/models` | GET | List models in Gemini format |
|
||||
| `/v1beta/models/{...path}` | POST | Gemini `generateContent` endpoint |
|
||||
|
||||
These endpoints mirror Gemini's API format for clients that expect native Gemini SDK compatibility.
|
||||
|
||||
### Internal / System APIs
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | ------ | ---------------------------------------------------- |
|
||||
| `/api/init` | GET | Application initialization check (used on first run) |
|
||||
| `/api/tags` | GET | Ollama-compatible model tags (for Ollama clients) |
|
||||
| `/api/restart` | POST | Trigger graceful server restart |
|
||||
| `/api/shutdown` | POST | Trigger graceful server shutdown |
|
||||
|
||||
> **Note:** These endpoints are used internally by the system or for Ollama client compatibility. They are not typically called by end users.
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
```
|
||||
|
||||
Transcribe audio files using Deepgram or AssemblyAI.
|
||||
|
||||
**Request:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@recording.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "Hello, this is the transcribed audio content.",
|
||||
"task": "transcribe",
|
||||
"language": "en",
|
||||
"duration": 12.5
|
||||
}
|
||||
```
|
||||
|
||||
**Supported providers:** `deepgram/nova-3`, `assemblyai/best`.
|
||||
|
||||
**Supported formats:** `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
## Ollama Compatibility
|
||||
|
||||
For clients that use Ollama's API format:
|
||||
|
||||
```bash
|
||||
# Chat endpoint (Ollama format)
|
||||
POST /v1/api/chat
|
||||
|
||||
# Model listing (Ollama format)
|
||||
GET /api/tags
|
||||
```
|
||||
|
||||
Requests are automatically translated between Ollama and internal formats.
|
||||
|
||||
---
|
||||
|
||||
## Telemetry
|
||||
|
||||
```bash
|
||||
# Get latency telemetry summary (p50/p95/p99 per provider)
|
||||
GET /api/telemetry/summary
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"claudeCode": { "p50": 245, "p95": 890, "p99": 1200, "count": 150 },
|
||||
"github": { "p50": 180, "p95": 620, "p99": 950, "count": 320 }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Budget
|
||||
|
||||
```bash
|
||||
# Get budget status for all API keys
|
||||
GET /api/usage/budget
|
||||
|
||||
# Set or update a budget
|
||||
POST /api/usage/budget
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"keyId": "key-123",
|
||||
"limit": 50.00,
|
||||
"period": "monthly"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Availability
|
||||
|
||||
```bash
|
||||
# Get real-time model availability across all providers
|
||||
GET /api/models/availability
|
||||
|
||||
# Check availability for a specific model
|
||||
POST /api/models/availability
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "claude-sonnet-4-5-20250929"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Request Processing
|
||||
|
||||
1. Client sends request to `/v1/*`
|
||||
2. Route handler calls `handleChat`, `handleEmbedding`, `handleAudioTranscription`, or `handleImageGeneration`
|
||||
3. Model is resolved (direct provider/model or alias/combo)
|
||||
4. Credentials selected from local DB with account availability filtering
|
||||
5. For chat: `handleChatCore` — format detection, translation, cache check, idempotency check
|
||||
6. Provider executor sends upstream request
|
||||
7. Response translated back to client format (chat) or returned as-is (embeddings/images/audio)
|
||||
8. Usage/logging recorded
|
||||
9. Fallback applies on errors according to combo rules
|
||||
|
||||
Full architecture reference: [`ARCHITECTURE.md`](ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
- Dashboard routes (`/dashboard/*`) use `auth_token` cookie
|
||||
- Login uses saved password hash; fallback to `INITIAL_PASSWORD`
|
||||
- `requireLogin` toggleable via `/api/settings/require-login`
|
||||
- `/v1/*` routes optionally require Bearer API key when `REQUIRE_API_KEY=true`
|
||||
@@ -0,0 +1,787 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/ARCHITECTURE.md) · 🇪🇸 [es](../es/ARCHITECTURE.md) · 🇫🇷 [fr](../fr/ARCHITECTURE.md) · 🇩🇪 [de](../de/ARCHITECTURE.md) · 🇮🇹 [it](../it/ARCHITECTURE.md) · 🇷🇺 [ru](../ru/ARCHITECTURE.md) · 🇨🇳 [zh-CN](../zh-CN/ARCHITECTURE.md) · 🇯🇵 [ja](../ja/ARCHITECTURE.md) · 🇰🇷 [ko](../ko/ARCHITECTURE.md) · 🇸🇦 [ar](../ar/ARCHITECTURE.md) · 🇮🇳 [in](../in/ARCHITECTURE.md) · 🇹🇭 [th](../th/ARCHITECTURE.md) · 🇻🇳 [vi](../vi/ARCHITECTURE.md) · 🇮🇩 [id](../id/ARCHITECTURE.md) · 🇲🇾 [ms](../ms/ARCHITECTURE.md) · 🇳🇱 [nl](../nl/ARCHITECTURE.md) · 🇵🇱 [pl](../pl/ARCHITECTURE.md) · 🇸🇪 [sv](../sv/ARCHITECTURE.md) · 🇳🇴 [no](../no/ARCHITECTURE.md) · 🇩🇰 [da](../da/ARCHITECTURE.md) · 🇫🇮 [fi](../fi/ARCHITECTURE.md) · 🇵🇹 [pt](../pt/ARCHITECTURE.md) · 🇷🇴 [ro](../ro/ARCHITECTURE.md) · 🇭🇺 [hu](../hu/ARCHITECTURE.md) · 🇧🇬 [bg](../bg/ARCHITECTURE.md) · 🇸🇰 [sk](../sk/ARCHITECTURE.md) · 🇺🇦 [uk-UA](../uk-UA/ARCHITECTURE.md) · 🇮🇱 [he](../he/ARCHITECTURE.md) · 🇵🇭 [phi](../phi/ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Architecture
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](ARCHITECTURE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/ARCHITECTURE.md) | 🇪🇸 [Español](i18n/es/ARCHITECTURE.md) | 🇫🇷 [Français](i18n/fr/ARCHITECTURE.md) | 🇮🇹 [Italiano](i18n/it/ARCHITECTURE.md) | 🇷🇺 [Русский](i18n/ru/ARCHITECTURE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/ARCHITECTURE.md) | 🇩🇪 [Deutsch](i18n/de/ARCHITECTURE.md) | 🇮🇳 [हिन्दी](i18n/in/ARCHITECTURE.md) | 🇹🇭 [ไทย](i18n/th/ARCHITECTURE.md) | 🇺🇦 [Українська](i18n/uk-UA/ARCHITECTURE.md) | 🇸🇦 [العربية](i18n/ar/ARCHITECTURE.md) | 🇯🇵 [日本語](i18n/ja/ARCHITECTURE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/ARCHITECTURE.md) | 🇧🇬 [Български](i18n/bg/ARCHITECTURE.md) | 🇩🇰 [Dansk](i18n/da/ARCHITECTURE.md) | 🇫🇮 [Suomi](i18n/fi/ARCHITECTURE.md) | 🇮🇱 [עברית](i18n/he/ARCHITECTURE.md) | 🇭🇺 [Magyar](i18n/hu/ARCHITECTURE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/ARCHITECTURE.md) | 🇰🇷 [한국어](i18n/ko/ARCHITECTURE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/ARCHITECTURE.md) | 🇳🇱 [Nederlands](i18n/nl/ARCHITECTURE.md) | 🇳🇴 [Norsk](i18n/no/ARCHITECTURE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/ARCHITECTURE.md) | 🇷🇴 [Română](i18n/ro/ARCHITECTURE.md) | 🇵🇱 [Polski](i18n/pl/ARCHITECTURE.md) | 🇸🇰 [Slovenčina](i18n/sk/ARCHITECTURE.md) | 🇸🇪 [Svenska](i18n/sv/ARCHITECTURE.md) | 🇵🇭 [Filipino](i18n/phi/ARCHITECTURE.md)
|
||||
|
||||
_Last updated: 2026-03-04_
|
||||
|
||||
## Executive Summary
|
||||
|
||||
OmniRoute is a local AI routing gateway and dashboard built on Next.js.
|
||||
It provides a single OpenAI-compatible endpoint (`/v1/*`) and routes traffic across multiple upstream providers with translation, fallback, token refresh, and usage tracking.
|
||||
|
||||
Core capabilities:
|
||||
|
||||
- OpenAI-compatible API surface for CLI/tools (28 providers)
|
||||
- Request/response translation across provider formats
|
||||
- Model combo fallback (multi-model sequence)
|
||||
- Account-level fallback (multi-account per provider)
|
||||
- OAuth + API-key provider connection management
|
||||
- Embedding generation via `/v1/embeddings` (6 providers, 9 models)
|
||||
- Image generation via `/v1/images/generations` (4 providers, 9 models)
|
||||
- Think tag parsing (`<think>...</think>`) for reasoning models
|
||||
- Response sanitization for strict OpenAI SDK compatibility
|
||||
- Role normalization (developer→system, system→user) for cross-provider compatibility
|
||||
- Structured output conversion (json_schema → Gemini responseSchema)
|
||||
- Local persistence for providers, keys, aliases, combos, settings, pricing
|
||||
- Usage/cost tracking and request logging
|
||||
- Optional cloud sync for multi-device/state sync
|
||||
- IP allowlist/blocklist for API access control
|
||||
- Thinking budget management (passthrough/auto/custom/adaptive)
|
||||
- Global system prompt injection
|
||||
- Session tracking and fingerprinting
|
||||
- Per-account enhanced rate limiting with provider-specific profiles
|
||||
- Circuit breaker pattern for provider resilience
|
||||
- Anti-thundering herd protection with mutex locking
|
||||
- Signature-based request deduplication cache
|
||||
- Domain layer: model availability, cost rules, fallback policy, lockout policy
|
||||
- Domain state persistence (SQLite write-through cache for fallbacks, budgets, lockouts, circuit breakers)
|
||||
- Policy engine for centralized request evaluation (lockout → budget → fallback)
|
||||
- Request telemetry with p50/p95/p99 latency aggregation
|
||||
- Correlation ID (X-Request-Id) for end-to-end tracing
|
||||
- Compliance audit logging with opt-out per API key
|
||||
- Eval framework for LLM quality assurance
|
||||
- Resilience UI dashboard with real-time circuit breaker status
|
||||
- Modular OAuth providers (12 individual modules under `src/lib/oauth/providers/`)
|
||||
|
||||
Primary runtime model:
|
||||
|
||||
- Next.js app routes under `src/app/api/*` implement both dashboard APIs and compatibility APIs
|
||||
- A shared SSE/routing core in `src/sse/*` + `open-sse/*` handles provider execution, translation, streaming, fallback, and usage
|
||||
|
||||
## Scope and Boundaries
|
||||
|
||||
### In Scope
|
||||
|
||||
- Local gateway runtime
|
||||
- Dashboard management APIs
|
||||
- Provider authentication and token refresh
|
||||
- Request translation and SSE streaming
|
||||
- Local state + usage persistence
|
||||
- Optional cloud sync orchestration
|
||||
|
||||
### Out of Scope
|
||||
|
||||
- Cloud service implementation behind `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Provider SLA/control plane outside local process
|
||||
- External CLI binaries themselves (Claude CLI, Codex CLI, etc.)
|
||||
|
||||
## High-Level System Context
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Clients[Developer Clients]
|
||||
C1[Claude Code]
|
||||
C2[Codex CLI]
|
||||
C3[OpenClaw / Droid / Cline / Continue / Roo]
|
||||
C4[Custom OpenAI-compatible clients]
|
||||
BROWSER[Browser Dashboard]
|
||||
end
|
||||
|
||||
subgraph Router[OmniRoute Local Process]
|
||||
API[V1 Compatibility API\n/v1/*]
|
||||
DASH[Dashboard + Management API\n/api/*]
|
||||
CORE[SSE + Translation Core\nopen-sse + src/sse]
|
||||
DB[(storage.sqlite)]
|
||||
UDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph Upstreams[Upstream Providers]
|
||||
P1[OAuth Providers\nClaude/Codex/Gemini/Qwen/iFlow/GitHub/Kiro/Cursor/Antigravity]
|
||||
P2[API Key Providers\nOpenAI/Anthropic/OpenRouter/GLM/Kimi/MiniMax\nDeepSeek/Groq/xAI/Mistral/Perplexity\nTogether/Fireworks/Cerebras/Cohere/NVIDIA]
|
||||
P3[Compatible Nodes\nOpenAI-compatible / Anthropic-compatible]
|
||||
end
|
||||
|
||||
subgraph Cloud[Optional Cloud Sync]
|
||||
CLOUD[Cloud Sync Endpoint\nNEXT_PUBLIC_CLOUD_URL]
|
||||
end
|
||||
|
||||
C1 --> API
|
||||
C2 --> API
|
||||
C3 --> API
|
||||
C4 --> API
|
||||
BROWSER --> DASH
|
||||
|
||||
API --> CORE
|
||||
DASH --> DB
|
||||
CORE --> DB
|
||||
CORE --> UDB
|
||||
|
||||
CORE --> P1
|
||||
CORE --> P2
|
||||
CORE --> P3
|
||||
|
||||
DASH --> CLOUD
|
||||
```
|
||||
|
||||
## Core Runtime Components
|
||||
|
||||
## 1) API and Routing Layer (Next.js App Routes)
|
||||
|
||||
Main directories:
|
||||
|
||||
- `src/app/api/v1/*` and `src/app/api/v1beta/*` for compatibility APIs
|
||||
- `src/app/api/*` for management/configuration APIs
|
||||
- Next rewrites in `next.config.mjs` map `/v1/*` to `/api/v1/*`
|
||||
|
||||
Important compatibility routes:
|
||||
|
||||
- `src/app/api/v1/chat/completions/route.ts`
|
||||
- `src/app/api/v1/messages/route.ts`
|
||||
- `src/app/api/v1/responses/route.ts`
|
||||
- `src/app/api/v1/models/route.ts` — includes custom models with `custom: true`
|
||||
- `src/app/api/v1/embeddings/route.ts` — embedding generation (6 providers)
|
||||
- `src/app/api/v1/images/generations/route.ts` — image generation (4+ providers incl. Antigravity/Nebius)
|
||||
- `src/app/api/v1/messages/count_tokens/route.ts`
|
||||
- `src/app/api/v1/providers/[provider]/chat/completions/route.ts` — dedicated per-provider chat
|
||||
- `src/app/api/v1/providers/[provider]/embeddings/route.ts` — dedicated per-provider embeddings
|
||||
- `src/app/api/v1/providers/[provider]/images/generations/route.ts` — dedicated per-provider images
|
||||
- `src/app/api/v1beta/models/route.ts`
|
||||
- `src/app/api/v1beta/models/[...path]/route.ts`
|
||||
|
||||
Management domains:
|
||||
|
||||
- Auth/settings: `src/app/api/auth/*`, `src/app/api/settings/*`
|
||||
- Providers/connections: `src/app/api/providers*`
|
||||
- Provider nodes: `src/app/api/provider-nodes*`
|
||||
- Custom models: `src/app/api/provider-models` (GET/POST/DELETE)
|
||||
- Model catalog: `src/app/api/models/route.ts` (GET)
|
||||
- Proxy config: `src/app/api/settings/proxy` (GET/PUT/DELETE) + `src/app/api/settings/proxy/test` (POST)
|
||||
- OAuth: `src/app/api/oauth/*`
|
||||
- Keys/aliases/combos/pricing: `src/app/api/keys*`, `src/app/api/models/alias`, `src/app/api/combos*`, `src/app/api/pricing`
|
||||
- Usage: `src/app/api/usage/*`
|
||||
- Sync/cloud: `src/app/api/sync/*`, `src/app/api/cloud/*`
|
||||
- CLI tooling helpers: `src/app/api/cli-tools/*`
|
||||
- IP filter: `src/app/api/settings/ip-filter` (GET/PUT)
|
||||
- Thinking budget: `src/app/api/settings/thinking-budget` (GET/PUT)
|
||||
- System prompt: `src/app/api/settings/system-prompt` (GET/PUT)
|
||||
- Sessions: `src/app/api/sessions` (GET)
|
||||
- Rate limits: `src/app/api/rate-limits` (GET)
|
||||
- Resilience: `src/app/api/resilience` (GET/PATCH) — provider profiles, circuit breaker, rate limit state
|
||||
- Resilience reset: `src/app/api/resilience/reset` (POST) — reset breakers + cooldowns
|
||||
- Cache stats: `src/app/api/cache/stats` (GET/DELETE)
|
||||
- Model availability: `src/app/api/models/availability` (GET/POST)
|
||||
- Telemetry: `src/app/api/telemetry/summary` (GET)
|
||||
- Budget: `src/app/api/usage/budget` (GET/POST)
|
||||
- Fallback chains: `src/app/api/fallback/chains` (GET/POST/DELETE)
|
||||
- Compliance audit: `src/app/api/compliance/audit-log` (GET)
|
||||
- Evals: `src/app/api/evals` (GET/POST), `src/app/api/evals/[suiteId]` (GET)
|
||||
- Policies: `src/app/api/policies` (GET/POST)
|
||||
|
||||
## 2) SSE + Translation Core
|
||||
|
||||
Main flow modules:
|
||||
|
||||
- Entry: `src/sse/handlers/chat.ts`
|
||||
- Core orchestration: `open-sse/handlers/chatCore.ts`
|
||||
- Provider execution adapters: `open-sse/executors/*`
|
||||
- Format detection/provider config: `open-sse/services/provider.ts`
|
||||
- Model parse/resolve: `src/sse/services/model.ts`, `open-sse/services/model.ts`
|
||||
- Account fallback logic: `open-sse/services/accountFallback.ts`
|
||||
- Translation registry: `open-sse/translator/index.ts`
|
||||
- Stream transformations: `open-sse/utils/stream.ts`, `open-sse/utils/streamHandler.ts`
|
||||
- Usage extraction/normalization: `open-sse/utils/usageTracking.ts`
|
||||
- Think tag parser: `open-sse/utils/thinkTagParser.ts`
|
||||
- Embedding handler: `open-sse/handlers/embeddings.ts`
|
||||
- Embedding provider registry: `open-sse/config/embeddingRegistry.ts`
|
||||
- Image generation handler: `open-sse/handlers/imageGeneration.ts`
|
||||
- Image provider registry: `open-sse/config/imageRegistry.ts`
|
||||
- Response sanitization: `open-sse/handlers/responseSanitizer.ts`
|
||||
- Role normalization: `open-sse/services/roleNormalizer.ts`
|
||||
|
||||
Services (business logic):
|
||||
|
||||
- Account selection/scoring: `open-sse/services/accountSelector.ts`
|
||||
- Context lifecycle management: `open-sse/services/contextManager.ts`
|
||||
- IP filter enforcement: `open-sse/services/ipFilter.ts`
|
||||
- Session tracking: `open-sse/services/sessionManager.ts`
|
||||
- Request deduplication: `open-sse/services/signatureCache.ts`
|
||||
- System prompt injection: `open-sse/services/systemPrompt.ts`
|
||||
- Thinking budget management: `open-sse/services/thinkingBudget.ts`
|
||||
- Wildcard model routing: `open-sse/services/wildcardRouter.ts`
|
||||
- Rate limit management: `open-sse/services/rateLimitManager.ts`
|
||||
- Circuit breaker: `open-sse/services/circuitBreaker.ts`
|
||||
|
||||
Domain layer modules:
|
||||
|
||||
- Model availability: `src/lib/domain/modelAvailability.ts`
|
||||
- Cost rules/budgets: `src/lib/domain/costRules.ts`
|
||||
- Fallback policy: `src/lib/domain/fallbackPolicy.ts`
|
||||
- Combo resolver: `src/lib/domain/comboResolver.ts`
|
||||
- Lockout policy: `src/lib/domain/lockoutPolicy.ts`
|
||||
- Policy engine: `src/domain/policyEngine.ts` — centralized lockout → budget → fallback evaluation
|
||||
- Error codes catalog: `src/lib/domain/errorCodes.ts`
|
||||
- Request ID: `src/lib/domain/requestId.ts`
|
||||
- Fetch timeout: `src/lib/domain/fetchTimeout.ts`
|
||||
- Request telemetry: `src/lib/domain/requestTelemetry.ts`
|
||||
- Compliance/audit: `src/lib/domain/compliance/index.ts`
|
||||
- Eval runner: `src/lib/domain/evalRunner.ts`
|
||||
- Domain state persistence: `src/lib/db/domainState.ts` — SQLite CRUD for fallback chains, budgets, cost history, lockout state, circuit breakers
|
||||
|
||||
OAuth provider modules (12 individual files under `src/lib/oauth/providers/`):
|
||||
|
||||
- Registry index: `src/lib/oauth/providers/index.ts`
|
||||
- Individual providers: `claude.ts`, `codex.ts`, `gemini.ts`, `antigravity.ts`, `iflow.ts`, `qwen.ts`, `kimi-coding.ts`, `github.ts`, `kiro.ts`, `cursor.ts`, `kilocode.ts`, `cline.ts`
|
||||
- Thin wrapper: `src/lib/oauth/providers.ts` — re-exports from individual modules
|
||||
|
||||
## 3) Persistence Layer
|
||||
|
||||
Primary state DB (SQLite):
|
||||
|
||||
- Core infra: `src/lib/db/core.ts` (better-sqlite3, migrations, WAL)
|
||||
- Re-export facade: `src/lib/localDb.ts` (thin compatibility layer for callers)
|
||||
- file: `${DATA_DIR}/storage.sqlite` (or `$XDG_CONFIG_HOME/omniroute/storage.sqlite` when set, else `~/.omniroute/storage.sqlite`)
|
||||
- entities (tables + KV namespaces): providerConnections, providerNodes, modelAliases, combos, apiKeys, settings, pricing, **customModels**, **proxyConfig**, **ipFilter**, **thinkingBudget**, **systemPrompt**
|
||||
|
||||
Usage persistence:
|
||||
|
||||
- facade: `src/lib/usageDb.ts` (decomposed modules in `src/lib/usage/*`)
|
||||
- SQLite tables in `storage.sqlite`: `usage_history`, `call_logs`, `proxy_logs`
|
||||
- optional file artifacts remain for compatibility/debug (`${DATA_DIR}/log.txt`, `${DATA_DIR}/call_logs/`, `<repo>/logs/...`)
|
||||
- legacy JSON files are migrated to SQLite by startup migrations when present
|
||||
|
||||
Domain State DB (SQLite):
|
||||
|
||||
- `src/lib/db/domainState.ts` — CRUD operations for domain state
|
||||
- Tables (created in `src/lib/db/core.ts`): `domain_fallback_chains`, `domain_budgets`, `domain_cost_history`, `domain_lockout_state`, `domain_circuit_breakers`
|
||||
- Write-through cache pattern: in-memory Maps are authoritative at runtime; mutations are written synchronously to SQLite; state is restored from DB on cold start
|
||||
|
||||
## 4) Auth + Security Surfaces
|
||||
|
||||
- Dashboard cookie auth: `src/proxy.ts`, `src/app/api/auth/login/route.ts`
|
||||
- API key generation/verification: `src/shared/utils/apiKey.ts`
|
||||
- Provider secrets persisted in `providerConnections` entries
|
||||
- Outbound proxy support via `open-sse/utils/proxyFetch.ts` (env vars) and `open-sse/utils/networkProxy.ts` (configurable per-provider or global)
|
||||
|
||||
## 5) Cloud Sync
|
||||
|
||||
- Scheduler init: `src/lib/initCloudSync.ts`, `src/shared/services/initializeCloudSync.ts`
|
||||
- Periodic task: `src/shared/services/cloudSyncScheduler.ts`
|
||||
- Control route: `src/app/api/sync/cloud/route.ts`
|
||||
|
||||
## Request Lifecycle (`/v1/chat/completions`)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant Client as CLI/SDK Client
|
||||
participant Route as /api/v1/chat/completions
|
||||
participant Chat as src/sse/handlers/chat
|
||||
participant Core as open-sse/handlers/chatCore
|
||||
participant Model as Model Resolver
|
||||
participant Auth as Credential Selector
|
||||
participant Exec as Provider Executor
|
||||
participant Prov as Upstream Provider
|
||||
participant Stream as Stream Translator
|
||||
participant Usage as usageDb
|
||||
|
||||
Client->>Route: POST /v1/chat/completions
|
||||
Route->>Chat: handleChat(request)
|
||||
Chat->>Model: parse/resolve model or combo
|
||||
|
||||
alt Combo model
|
||||
Chat->>Chat: iterate combo models (handleComboChat)
|
||||
end
|
||||
|
||||
Chat->>Auth: getProviderCredentials(provider)
|
||||
Auth-->>Chat: active account + tokens/api key
|
||||
|
||||
Chat->>Core: handleChatCore(body, modelInfo, credentials)
|
||||
Core->>Core: detect source format
|
||||
Core->>Core: translate request to target format
|
||||
Core->>Exec: execute(provider, transformedBody)
|
||||
Exec->>Prov: upstream API call
|
||||
Prov-->>Exec: SSE/JSON response
|
||||
Exec-->>Core: response + metadata
|
||||
|
||||
alt 401/403
|
||||
Core->>Exec: refreshCredentials()
|
||||
Exec-->>Core: updated tokens
|
||||
Core->>Exec: retry request
|
||||
end
|
||||
|
||||
Core->>Stream: translate/normalize stream to client format
|
||||
Stream-->>Client: SSE chunks / JSON response
|
||||
|
||||
Stream->>Usage: extract usage + persist history/log
|
||||
```
|
||||
|
||||
## Combo + Account Fallback Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[Incoming model string] --> B{Is combo name?}
|
||||
B -- Yes --> C[Load combo models sequence]
|
||||
B -- No --> D[Single model path]
|
||||
|
||||
C --> E[Try model N]
|
||||
E --> F[Resolve provider/model]
|
||||
D --> F
|
||||
|
||||
F --> G[Select account credentials]
|
||||
G --> H{Credentials available?}
|
||||
H -- No --> I[Return provider unavailable]
|
||||
H -- Yes --> J[Execute request]
|
||||
|
||||
J --> K{Success?}
|
||||
K -- Yes --> L[Return response]
|
||||
K -- No --> M{Fallback-eligible error?}
|
||||
|
||||
M -- No --> N[Return error]
|
||||
M -- Yes --> O[Mark account unavailable cooldown]
|
||||
O --> P{Another account for provider?}
|
||||
P -- Yes --> G
|
||||
P -- No --> Q{In combo with next model?}
|
||||
Q -- Yes --> E
|
||||
Q -- No --> R[Return all unavailable]
|
||||
```
|
||||
|
||||
Fallback decisions are driven by `open-sse/services/accountFallback.ts` using status codes and error-message heuristics.
|
||||
|
||||
## OAuth Onboarding and Token Refresh Lifecycle
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Dashboard UI
|
||||
participant OAuth as /api/oauth/[provider]/[action]
|
||||
participant ProvAuth as Provider Auth Server
|
||||
participant DB as localDb
|
||||
participant Test as /api/providers/[id]/test
|
||||
participant Exec as Provider Executor
|
||||
|
||||
UI->>OAuth: GET authorize or device-code
|
||||
OAuth->>ProvAuth: create auth/device flow
|
||||
ProvAuth-->>OAuth: auth URL or device code payload
|
||||
OAuth-->>UI: flow data
|
||||
|
||||
UI->>OAuth: POST exchange or poll
|
||||
OAuth->>ProvAuth: token exchange/poll
|
||||
ProvAuth-->>OAuth: access/refresh tokens
|
||||
OAuth->>DB: createProviderConnection(oauth data)
|
||||
OAuth-->>UI: success + connection id
|
||||
|
||||
UI->>Test: POST /api/providers/[id]/test
|
||||
Test->>Exec: validate credentials / optional refresh
|
||||
Exec-->>Test: valid or refreshed token info
|
||||
Test->>DB: update status/tokens/errors
|
||||
Test-->>UI: validation result
|
||||
```
|
||||
|
||||
Refresh during live traffic is executed inside `open-sse/handlers/chatCore.ts` via executor `refreshCredentials()`.
|
||||
|
||||
## Cloud Sync Lifecycle (Enable / Sync / Disable)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Endpoint Page UI
|
||||
participant Sync as /api/sync/cloud
|
||||
participant DB as localDb
|
||||
participant Cloud as External Cloud Sync
|
||||
participant Claude as ~/.claude/settings.json
|
||||
|
||||
UI->>Sync: POST action=enable
|
||||
Sync->>DB: set cloudEnabled=true
|
||||
Sync->>DB: ensure API key exists
|
||||
Sync->>Cloud: POST /sync/{machineId} (providers/aliases/combos/keys)
|
||||
Cloud-->>Sync: sync result
|
||||
Sync->>Cloud: GET /{machineId}/v1/verify
|
||||
Sync-->>UI: enabled + verification status
|
||||
|
||||
UI->>Sync: POST action=sync
|
||||
Sync->>Cloud: POST /sync/{machineId}
|
||||
Cloud-->>Sync: remote data
|
||||
Sync->>DB: update newer local tokens/status
|
||||
Sync-->>UI: synced
|
||||
|
||||
UI->>Sync: POST action=disable
|
||||
Sync->>DB: set cloudEnabled=false
|
||||
Sync->>Cloud: DELETE /sync/{machineId}
|
||||
Sync->>Claude: switch ANTHROPIC_BASE_URL back to local (if needed)
|
||||
Sync-->>UI: disabled
|
||||
```
|
||||
|
||||
Periodic sync is triggered by `CloudSyncScheduler` when cloud is enabled.
|
||||
|
||||
## Data Model and Storage Map
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
SETTINGS ||--o{ PROVIDER_CONNECTION : controls
|
||||
PROVIDER_NODE ||--o{ PROVIDER_CONNECTION : backs_compatible_provider
|
||||
PROVIDER_CONNECTION ||--o{ USAGE_ENTRY : emits_usage
|
||||
|
||||
SETTINGS {
|
||||
boolean cloudEnabled
|
||||
number stickyRoundRobinLimit
|
||||
boolean requireLogin
|
||||
string password_hash
|
||||
string fallbackStrategy
|
||||
json rateLimitDefaults
|
||||
json providerProfiles
|
||||
}
|
||||
|
||||
PROVIDER_CONNECTION {
|
||||
string id
|
||||
string provider
|
||||
string authType
|
||||
string name
|
||||
number priority
|
||||
boolean isActive
|
||||
string apiKey
|
||||
string accessToken
|
||||
string refreshToken
|
||||
string expiresAt
|
||||
string testStatus
|
||||
string lastError
|
||||
string rateLimitedUntil
|
||||
json providerSpecificData
|
||||
}
|
||||
|
||||
PROVIDER_NODE {
|
||||
string id
|
||||
string type
|
||||
string name
|
||||
string prefix
|
||||
string apiType
|
||||
string baseUrl
|
||||
}
|
||||
|
||||
MODEL_ALIAS {
|
||||
string alias
|
||||
string targetModel
|
||||
}
|
||||
|
||||
COMBO {
|
||||
string id
|
||||
string name
|
||||
string[] models
|
||||
}
|
||||
|
||||
API_KEY {
|
||||
string id
|
||||
string name
|
||||
string key
|
||||
string machineId
|
||||
}
|
||||
|
||||
USAGE_ENTRY {
|
||||
string provider
|
||||
string model
|
||||
number prompt_tokens
|
||||
number completion_tokens
|
||||
string connectionId
|
||||
string timestamp
|
||||
}
|
||||
|
||||
CUSTOM_MODEL {
|
||||
string id
|
||||
string name
|
||||
string providerId
|
||||
}
|
||||
|
||||
PROXY_CONFIG {
|
||||
string global
|
||||
json providers
|
||||
}
|
||||
|
||||
IP_FILTER {
|
||||
string mode
|
||||
string[] allowlist
|
||||
string[] blocklist
|
||||
}
|
||||
|
||||
THINKING_BUDGET {
|
||||
string mode
|
||||
number customBudget
|
||||
string effortLevel
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT {
|
||||
boolean enabled
|
||||
string prompt
|
||||
string position
|
||||
}
|
||||
```
|
||||
|
||||
Physical storage files:
|
||||
|
||||
- primary runtime DB: `${DATA_DIR}/storage.sqlite`
|
||||
- request log lines: `${DATA_DIR}/log.txt` (compat/debug artifact)
|
||||
- structured call payload archives: `${DATA_DIR}/call_logs/`
|
||||
- optional translator/request debug sessions: `<repo>/logs/...`
|
||||
|
||||
## Deployment Topology
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph LocalHost[Developer Host]
|
||||
CLI[CLI Tools]
|
||||
Browser[Dashboard Browser]
|
||||
end
|
||||
|
||||
subgraph ContainerOrProcess[OmniRoute Runtime]
|
||||
Next[Next.js Server\nPORT=20128]
|
||||
Core[SSE Core + Executors]
|
||||
MainDB[(storage.sqlite)]
|
||||
UsageDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph External[External Services]
|
||||
Providers[AI Providers]
|
||||
SyncCloud[Cloud Sync Service]
|
||||
end
|
||||
|
||||
CLI --> Next
|
||||
Browser --> Next
|
||||
Next --> Core
|
||||
Next --> MainDB
|
||||
Core --> MainDB
|
||||
Core --> UsageDB
|
||||
Core --> Providers
|
||||
Next --> SyncCloud
|
||||
```
|
||||
|
||||
## Module Mapping (Decision-Critical)
|
||||
|
||||
### Route and API Modules
|
||||
|
||||
- `src/app/api/v1/*`, `src/app/api/v1beta/*`: compatibility APIs
|
||||
- `src/app/api/v1/providers/[provider]/*`: dedicated per-provider routes (chat, embeddings, images)
|
||||
- `src/app/api/providers*`: provider CRUD, validation, testing
|
||||
- `src/app/api/provider-nodes*`: custom compatible node management
|
||||
- `src/app/api/provider-models`: custom model management (CRUD)
|
||||
- `src/app/api/models/route.ts`: model catalog API (aliases + custom models)
|
||||
- `src/app/api/oauth/*`: OAuth/device-code flows
|
||||
- `src/app/api/keys*`: local API key lifecycle
|
||||
- `src/app/api/models/alias`: alias management
|
||||
- `src/app/api/combos*`: fallback combo management
|
||||
- `src/app/api/pricing`: pricing overrides for cost calculation
|
||||
- `src/app/api/settings/proxy`: proxy configuration (GET/PUT/DELETE)
|
||||
- `src/app/api/settings/proxy/test`: outbound proxy connectivity test (POST)
|
||||
- `src/app/api/usage/*`: usage and logs APIs
|
||||
- `src/app/api/sync/*` + `src/app/api/cloud/*`: cloud sync and cloud-facing helpers
|
||||
- `src/app/api/cli-tools/*`: local CLI config writers/checkers
|
||||
- `src/app/api/settings/ip-filter`: IP allowlist/blocklist (GET/PUT)
|
||||
- `src/app/api/settings/thinking-budget`: thinking token budget config (GET/PUT)
|
||||
- `src/app/api/settings/system-prompt`: global system prompt (GET/PUT)
|
||||
- `src/app/api/sessions`: active session listing (GET)
|
||||
- `src/app/api/rate-limits`: per-account rate limit status (GET)
|
||||
|
||||
### Routing and Execution Core
|
||||
|
||||
- `src/sse/handlers/chat.ts`: request parse, combo handling, account selection loop
|
||||
- `open-sse/handlers/chatCore.ts`: translation, executor dispatch, retry/refresh handling, stream setup
|
||||
- `open-sse/executors/*`: provider-specific network and format behavior
|
||||
|
||||
### Translation Registry and Format Converters
|
||||
|
||||
- `open-sse/translator/index.ts`: translator registry and orchestration
|
||||
- Request translators: `open-sse/translator/request/*`
|
||||
- Response translators: `open-sse/translator/response/*`
|
||||
- Format constants: `open-sse/translator/formats.ts`
|
||||
|
||||
### Persistence
|
||||
|
||||
- `src/lib/db/*`: persistent config/state and domain persistence on SQLite
|
||||
- `src/lib/localDb.ts`: compatibility re-export for DB modules
|
||||
- `src/lib/usageDb.ts`: usage history/call logs facade on top of SQLite tables
|
||||
|
||||
## Provider Executor Coverage (Strategy Pattern)
|
||||
|
||||
Each provider has a specialized executor extending `BaseExecutor` (in `open-sse/executors/base.ts`), which provides URL building, header construction, retry with exponential backoff, credential refresh hooks, and the `execute()` orchestration method.
|
||||
|
||||
| Executor | Provider(s) | Special Handling |
|
||||
| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------- |
|
||||
| `DefaultExecutor` | OpenAI, Claude, Gemini, Qwen, iFlow, OpenRouter, GLM, Kimi, MiniMax, DeepSeek, Groq, xAI, Mistral, Perplexity, Together, Fireworks, Cerebras, Cohere, NVIDIA | Dynamic URL/header config per provider |
|
||||
| `AntigravityExecutor` | Google Antigravity | Custom project/session IDs, Retry-After parsing |
|
||||
| `CodexExecutor` | OpenAI Codex | Injects system instructions, forces reasoning effort |
|
||||
| `CursorExecutor` | Cursor IDE | ConnectRPC protocol, Protobuf encoding, request signing via checksum |
|
||||
| `GithubExecutor` | GitHub Copilot | Copilot token refresh, VSCode-mimicking headers |
|
||||
| `KiroExecutor` | AWS CodeWhisperer/Kiro | AWS EventStream binary format → SSE conversion |
|
||||
| `GeminiCLIExecutor` | Gemini CLI | Google OAuth token refresh cycle |
|
||||
|
||||
All other providers (including custom compatible nodes) use the `DefaultExecutor`.
|
||||
|
||||
## Provider Compatibility Matrix
|
||||
|
||||
| Provider | Format | Auth | Stream | Non-Stream | Token Refresh | Usage API |
|
||||
| ---------------- | ---------------- | --------------------- | ---------------- | ---------- | ------------- | ------------------ |
|
||||
| Claude | claude | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Admin only |
|
||||
| Gemini | gemini | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Gemini CLI | gemini-cli | OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Antigravity | antigravity | OAuth | ✅ | ✅ | ✅ | ✅ Full quota API |
|
||||
| OpenAI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Codex | openai-responses | OAuth | ✅ forced | ❌ | ✅ | ✅ Rate limits |
|
||||
| GitHub Copilot | openai | OAuth + Copilot Token | ✅ | ✅ | ✅ | ✅ Quota snapshots |
|
||||
| Cursor | cursor | Custom checksum | ✅ | ✅ | ❌ | ❌ |
|
||||
| Kiro | kiro | AWS SSO OIDC | ✅ (EventStream) | ❌ | ✅ | ✅ Usage limits |
|
||||
| Qwen | openai | OAuth | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| iFlow | openai | OAuth (Basic) | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| OpenRouter | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| GLM/Kimi/MiniMax | claude | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| DeepSeek | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Groq | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| xAI (Grok) | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Mistral | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Perplexity | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Together AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Fireworks AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cerebras | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cohere | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| NVIDIA NIM | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
|
||||
## Format Translation Coverage
|
||||
|
||||
Detected source formats include:
|
||||
|
||||
- `openai`
|
||||
- `openai-responses`
|
||||
- `claude`
|
||||
- `gemini`
|
||||
|
||||
Target formats include:
|
||||
|
||||
- OpenAI chat/Responses
|
||||
- Claude
|
||||
- Gemini/Gemini-CLI/Antigravity envelope
|
||||
- Kiro
|
||||
- Cursor
|
||||
|
||||
Translations use **OpenAI as the hub format** — all conversions go through OpenAI as intermediate:
|
||||
|
||||
```
|
||||
Source Format → OpenAI (hub) → Target Format
|
||||
```
|
||||
|
||||
Translations are selected dynamically based on source payload shape and provider target format.
|
||||
|
||||
Additional processing layers in the translation pipeline:
|
||||
|
||||
- **Response sanitization** — Strips non-standard fields from OpenAI-format responses (both streaming and non-streaming) to ensure strict SDK compliance
|
||||
- **Role normalization** — Converts `developer` → `system` for non-OpenAI targets; merges `system` → `user` for models that reject the system role (GLM, ERNIE)
|
||||
- **Think tag extraction** — Parses `<think>...</think>` blocks from content into `reasoning_content` field
|
||||
- **Structured output** — Converts OpenAI `response_format.json_schema` to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
## Supported API Endpoints
|
||||
|
||||
| Endpoint | Format | Handler |
|
||||
| -------------------------------------------------- | ------------------ | ---------------------------------------------------- |
|
||||
| `POST /v1/chat/completions` | OpenAI Chat | `src/sse/handlers/chat.ts` |
|
||||
| `POST /v1/messages` | Claude Messages | Same handler (auto-detected) |
|
||||
| `POST /v1/responses` | OpenAI Responses | `open-sse/handlers/responsesHandler.ts` |
|
||||
| `POST /v1/embeddings` | OpenAI Embeddings | `open-sse/handlers/embeddings.ts` |
|
||||
| `GET /v1/embeddings` | Model listing | API route |
|
||||
| `POST /v1/images/generations` | OpenAI Images | `open-sse/handlers/imageGeneration.ts` |
|
||||
| `GET /v1/images/generations` | Model listing | API route |
|
||||
| `POST /v1/providers/{provider}/chat/completions` | OpenAI Chat | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/embeddings` | OpenAI Embeddings | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/images/generations` | OpenAI Images | Dedicated per-provider with model validation |
|
||||
| `POST /v1/messages/count_tokens` | Claude Token Count | API route |
|
||||
| `GET /v1/models` | OpenAI Models list | API route (chat + embedding + image + custom models) |
|
||||
| `GET /api/models/catalog` | Catalog | All models grouped by provider + type |
|
||||
| `POST /v1beta/models/*:streamGenerateContent` | Gemini native | API route |
|
||||
| `GET/PUT/DELETE /api/settings/proxy` | Proxy Config | Network proxy configuration |
|
||||
| `POST /api/settings/proxy/test` | Proxy Connectivity | Proxy health/connectivity test endpoint |
|
||||
| `GET/POST/DELETE /api/provider-models` | Custom Models | Custom model management per provider |
|
||||
|
||||
## Bypass Handler
|
||||
|
||||
The bypass handler (`open-sse/utils/bypassHandler.ts`) intercepts known "throwaway" requests from Claude CLI — warmup pings, title extractions, and token counts — and returns a **fake response** without consuming upstream provider tokens. This is triggered only when `User-Agent` contains `claude-cli`.
|
||||
|
||||
## Request Logger Pipeline
|
||||
|
||||
The request logger (`open-sse/utils/requestLogger.ts`) provides a 7-stage debug logging pipeline, disabled by default, enabled via `ENABLE_REQUEST_LOGS=true`:
|
||||
|
||||
```
|
||||
1_req_client.json → 2_req_source.json → 3_req_openai.json → 4_req_target.json
|
||||
→ 5_res_provider.txt → 6_res_openai.txt → 7_res_client.txt
|
||||
```
|
||||
|
||||
Files are written to `<repo>/logs/<session>/` for each request session.
|
||||
|
||||
## Failure Modes and Resilience
|
||||
|
||||
## 1) Account/Provider Availability
|
||||
|
||||
- provider account cooldown on transient/rate/auth errors
|
||||
- account fallback before failing request
|
||||
- combo model fallback when current model/provider path is exhausted
|
||||
|
||||
## 2) Token Expiry
|
||||
|
||||
- pre-check and refresh with retry for refreshable providers
|
||||
- 401/403 retry after refresh attempt in core path
|
||||
|
||||
## 3) Stream Safety
|
||||
|
||||
- disconnect-aware stream controller
|
||||
- translation stream with end-of-stream flush and `[DONE]` handling
|
||||
- usage estimation fallback when provider usage metadata is missing
|
||||
|
||||
## 4) Cloud Sync Degradation
|
||||
|
||||
- sync errors are surfaced but local runtime continues
|
||||
- scheduler has retry-capable logic, but periodic execution currently calls single-attempt sync by default
|
||||
|
||||
## 5) Data Integrity
|
||||
|
||||
- SQLite schema migrations and auto-upgrade hooks at startup
|
||||
- legacy JSON → SQLite migration compatibility path
|
||||
|
||||
## Observability and Operational Signals
|
||||
|
||||
Runtime visibility sources:
|
||||
|
||||
- console logs from `src/sse/utils/logger.ts`
|
||||
- per-request usage aggregates in SQLite (`usage_history`, `call_logs`, `proxy_logs`)
|
||||
- textual request status log in `log.txt` (optional/compat)
|
||||
- optional deep request/translation logs under `logs/` when `ENABLE_REQUEST_LOGS=true`
|
||||
- dashboard usage endpoints (`/api/usage/*`) for UI consumption
|
||||
|
||||
## Security-Sensitive Boundaries
|
||||
|
||||
- JWT secret (`JWT_SECRET`) secures dashboard session cookie verification/signing
|
||||
- Initial password bootstrap (`INITIAL_PASSWORD`) should be explicitly configured for first-run provisioning
|
||||
- API key HMAC secret (`API_KEY_SECRET`) secures generated local API key format
|
||||
- Provider secrets (API keys/tokens) are persisted in local DB and should be protected at filesystem level
|
||||
- Cloud sync endpoints rely on API key auth + machine id semantics
|
||||
|
||||
## Environment and Runtime Matrix
|
||||
|
||||
Environment variables actively used by code:
|
||||
|
||||
- App/auth: `JWT_SECRET`, `INITIAL_PASSWORD`
|
||||
- Storage: `DATA_DIR`
|
||||
- Compatible node behavior: `ALLOW_MULTI_CONNECTIONS_PER_COMPAT_NODE`
|
||||
- Optional storage base override (Linux/macOS when `DATA_DIR` unset): `XDG_CONFIG_HOME`
|
||||
- Security hashing: `API_KEY_SECRET`, `MACHINE_ID_SALT`
|
||||
- Logging: `ENABLE_REQUEST_LOGS`
|
||||
- Sync/cloud URLing: `NEXT_PUBLIC_BASE_URL`, `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Outbound proxy: `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY` and lowercase variants
|
||||
- SOCKS5 feature flags: `ENABLE_SOCKS5_PROXY`, `NEXT_PUBLIC_ENABLE_SOCKS5_PROXY`
|
||||
- Platform/runtime helpers (not app-specific config): `APPDATA`, `NODE_ENV`, `PORT`, `HOSTNAME`
|
||||
|
||||
## Known Architectural Notes
|
||||
|
||||
1. `usageDb` and `localDb` share the same base directory policy (`DATA_DIR` -> `XDG_CONFIG_HOME/omniroute` -> `~/.omniroute`) with legacy file migration.
|
||||
2. `/api/v1/route.ts` delegates to the same unified catalog builder used by `/api/v1/models` (`src/app/api/v1/models/catalog.ts`) to avoid semantic drift.
|
||||
3. Request logger writes full headers/body when enabled; treat log directory as sensitive.
|
||||
4. Cloud behavior depends on correct `NEXT_PUBLIC_BASE_URL` and cloud endpoint reachability.
|
||||
5. The `open-sse/` directory is published as the `@omniroute/open-sse` **npm workspace package**. Source code imports it via `@omniroute/open-sse/...` (resolved by Next.js `transpilePackages`). File paths in this document still use the directory name `open-sse/` for consistency.
|
||||
6. Charts in the dashboard use **Recharts** (SVG-based) for accessible, interactive analytics visualizations (model usage bar charts, provider breakdown tables with success rates).
|
||||
7. E2E tests use **Playwright** (`tests/e2e/`), run via `npm run test:e2e`. Unit tests use **Node.js test runner** (`tests/unit/`), run via `npm run test:unit`. Source code under `src/` is **TypeScript** (`.ts`/`.tsx`); the `open-sse/` workspace remains JavaScript (`.js`).
|
||||
8. Settings page is organized into 5 tabs: Security, Routing (6 global strategies: fill-first, round-robin, p2c, random, least-used, cost-optimized), Resilience (editable rate limits, circuit breaker, policies), AI (thinking budget, system prompt, prompt cache), Advanced (proxy).
|
||||
|
||||
## Operational Verification Checklist
|
||||
|
||||
- Build from source: `npm run build`
|
||||
- Build Docker image: `docker build -t omniroute .`
|
||||
- Start service and verify:
|
||||
- `GET /api/settings`
|
||||
- `GET /api/v1/models`
|
||||
- CLI target base URL should be `http://<host>:20128/v1` when `PORT=20128`
|
||||
@@ -0,0 +1,67 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/AUTO-COMBO.md) · 🇪🇸 [es](../es/AUTO-COMBO.md) · 🇫🇷 [fr](../fr/AUTO-COMBO.md) · 🇩🇪 [de](../de/AUTO-COMBO.md) · 🇮🇹 [it](../it/AUTO-COMBO.md) · 🇷🇺 [ru](../ru/AUTO-COMBO.md) · 🇨🇳 [zh-CN](../zh-CN/AUTO-COMBO.md) · 🇯🇵 [ja](../ja/AUTO-COMBO.md) · 🇰🇷 [ko](../ko/AUTO-COMBO.md) · 🇸🇦 [ar](../ar/AUTO-COMBO.md) · 🇮🇳 [in](../in/AUTO-COMBO.md) · 🇹🇭 [th](../th/AUTO-COMBO.md) · 🇻🇳 [vi](../vi/AUTO-COMBO.md) · 🇮🇩 [id](../id/AUTO-COMBO.md) · 🇲🇾 [ms](../ms/AUTO-COMBO.md) · 🇳🇱 [nl](../nl/AUTO-COMBO.md) · 🇵🇱 [pl](../pl/AUTO-COMBO.md) · 🇸🇪 [sv](../sv/AUTO-COMBO.md) · 🇳🇴 [no](../no/AUTO-COMBO.md) · 🇩🇰 [da](../da/AUTO-COMBO.md) · 🇫🇮 [fi](../fi/AUTO-COMBO.md) · 🇵🇹 [pt](../pt/AUTO-COMBO.md) · 🇷🇴 [ro](../ro/AUTO-COMBO.md) · 🇭🇺 [hu](../hu/AUTO-COMBO.md) · 🇧🇬 [bg](../bg/AUTO-COMBO.md) · 🇸🇰 [sk](../sk/AUTO-COMBO.md) · 🇺🇦 [uk-UA](../uk-UA/AUTO-COMBO.md) · 🇮🇱 [he](../he/AUTO-COMBO.md) · 🇵🇭 [phi](../phi/AUTO-COMBO.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Auto-Combo Engine
|
||||
|
||||
> Self-managing model chains with adaptive scoring
|
||||
|
||||
## How It Works
|
||||
|
||||
The Auto-Combo Engine dynamically selects the best provider/model for each request using a **6-factor scoring function**:
|
||||
|
||||
| Factor | Weight | Description |
|
||||
| :--------- | :----- | :---------------------------------------------- |
|
||||
| Quota | 0.20 | Remaining capacity [0..1] |
|
||||
| Health | 0.25 | Circuit breaker: CLOSED=1.0, HALF=0.5, OPEN=0.0 |
|
||||
| CostInv | 0.20 | Inverse cost (cheaper = higher score) |
|
||||
| LatencyInv | 0.15 | Inverse p95 latency (faster = higher) |
|
||||
| TaskFit | 0.10 | Model × task type fitness score |
|
||||
| Stability | 0.10 | Low variance in latency/errors |
|
||||
|
||||
## Mode Packs
|
||||
|
||||
| Pack | Focus | Key Weight |
|
||||
| :---------------------- | :----------- | :--------------- |
|
||||
| 🚀 **Ship Fast** | Speed | latencyInv: 0.35 |
|
||||
| 💰 **Cost Saver** | Economy | costInv: 0.40 |
|
||||
| 🎯 **Quality First** | Best model | taskFit: 0.40 |
|
||||
| 📡 **Offline Friendly** | Availability | quota: 0.40 |
|
||||
|
||||
## Self-Healing
|
||||
|
||||
- **Temporary exclusion**: Score < 0.2 → excluded for 5 min (progressive backoff, max 30 min)
|
||||
- **Circuit breaker awareness**: OPEN → auto-excluded; HALF_OPEN → probe requests
|
||||
- **Incident mode**: >50% OPEN → disable exploration, maximize stability
|
||||
- **Cooldown recovery**: After exclusion, first request is a "probe" with reduced timeout
|
||||
|
||||
## Bandit Exploration
|
||||
|
||||
5% of requests (configurable) are routed to random providers for exploration. Disabled in incident mode.
|
||||
|
||||
## API
|
||||
|
||||
```bash
|
||||
# Create auto-combo
|
||||
curl -X POST http://localhost:20128/api/combos/auto \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"id":"my-auto","name":"Auto Coder","candidatePool":["anthropic","google","openai"],"modePack":"ship-fast"}'
|
||||
|
||||
# List auto-combos
|
||||
curl http://localhost:20128/api/combos/auto
|
||||
```
|
||||
|
||||
## Task Fitness
|
||||
|
||||
30+ models scored across 6 task types (`coding`, `review`, `planning`, `analysis`, `debugging`, `documentation`). Supports wildcard patterns (e.g., `*-coder` → high coding score).
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------ |
|
||||
| `open-sse/services/autoCombo/scoring.ts` | Scoring function & pool normalization |
|
||||
| `open-sse/services/autoCombo/taskFitness.ts` | Model × task fitness lookup |
|
||||
| `open-sse/services/autoCombo/engine.ts` | Selection logic, bandit, budget cap |
|
||||
| `open-sse/services/autoCombo/selfHealing.ts` | Exclusion, probes, incident mode |
|
||||
| `open-sse/services/autoCombo/modePacks.ts` | 4 weight profiles |
|
||||
| `src/app/api/combos/auto/route.ts` | REST API |
|
||||
@@ -0,0 +1,593 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/CODEBASE_DOCUMENTATION.md) · 🇪🇸 [es](../es/CODEBASE_DOCUMENTATION.md) · 🇫🇷 [fr](../fr/CODEBASE_DOCUMENTATION.md) · 🇩🇪 [de](../de/CODEBASE_DOCUMENTATION.md) · 🇮🇹 [it](../it/CODEBASE_DOCUMENTATION.md) · 🇷🇺 [ru](../ru/CODEBASE_DOCUMENTATION.md) · 🇨🇳 [zh-CN](../zh-CN/CODEBASE_DOCUMENTATION.md) · 🇯🇵 [ja](../ja/CODEBASE_DOCUMENTATION.md) · 🇰🇷 [ko](../ko/CODEBASE_DOCUMENTATION.md) · 🇸🇦 [ar](../ar/CODEBASE_DOCUMENTATION.md) · 🇮🇳 [in](../in/CODEBASE_DOCUMENTATION.md) · 🇹🇭 [th](../th/CODEBASE_DOCUMENTATION.md) · 🇻🇳 [vi](../vi/CODEBASE_DOCUMENTATION.md) · 🇮🇩 [id](../id/CODEBASE_DOCUMENTATION.md) · 🇲🇾 [ms](../ms/CODEBASE_DOCUMENTATION.md) · 🇳🇱 [nl](../nl/CODEBASE_DOCUMENTATION.md) · 🇵🇱 [pl](../pl/CODEBASE_DOCUMENTATION.md) · 🇸🇪 [sv](../sv/CODEBASE_DOCUMENTATION.md) · 🇳🇴 [no](../no/CODEBASE_DOCUMENTATION.md) · 🇩🇰 [da](../da/CODEBASE_DOCUMENTATION.md) · 🇫🇮 [fi](../fi/CODEBASE_DOCUMENTATION.md) · 🇵🇹 [pt](../pt/CODEBASE_DOCUMENTATION.md) · 🇷🇴 [ro](../ro/CODEBASE_DOCUMENTATION.md) · 🇭🇺 [hu](../hu/CODEBASE_DOCUMENTATION.md) · 🇧🇬 [bg](../bg/CODEBASE_DOCUMENTATION.md) · 🇸🇰 [sk](../sk/CODEBASE_DOCUMENTATION.md) · 🇺🇦 [uk-UA](../uk-UA/CODEBASE_DOCUMENTATION.md) · 🇮🇱 [he](../he/CODEBASE_DOCUMENTATION.md) · 🇵🇭 [phi](../phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
---
|
||||
|
||||
# omniroute — Codebase Documentation
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](CODEBASE_DOCUMENTATION.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/CODEBASE_DOCUMENTATION.md) | 🇪🇸 [Español](i18n/es/CODEBASE_DOCUMENTATION.md) | 🇫🇷 [Français](i18n/fr/CODEBASE_DOCUMENTATION.md) | 🇮🇹 [Italiano](i18n/it/CODEBASE_DOCUMENTATION.md) | 🇷🇺 [Русский](i18n/ru/CODEBASE_DOCUMENTATION.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/CODEBASE_DOCUMENTATION.md) | 🇩🇪 [Deutsch](i18n/de/CODEBASE_DOCUMENTATION.md) | 🇮🇳 [हिन्दी](i18n/in/CODEBASE_DOCUMENTATION.md) | 🇹🇭 [ไทย](i18n/th/CODEBASE_DOCUMENTATION.md) | 🇺🇦 [Українська](i18n/uk-UA/CODEBASE_DOCUMENTATION.md) | 🇸🇦 [العربية](i18n/ar/CODEBASE_DOCUMENTATION.md) | 🇯🇵 [日本語](i18n/ja/CODEBASE_DOCUMENTATION.md) | 🇻🇳 [Tiếng Việt](i18n/vi/CODEBASE_DOCUMENTATION.md) | 🇧🇬 [Български](i18n/bg/CODEBASE_DOCUMENTATION.md) | 🇩🇰 [Dansk](i18n/da/CODEBASE_DOCUMENTATION.md) | 🇫🇮 [Suomi](i18n/fi/CODEBASE_DOCUMENTATION.md) | 🇮🇱 [עברית](i18n/he/CODEBASE_DOCUMENTATION.md) | 🇭🇺 [Magyar](i18n/hu/CODEBASE_DOCUMENTATION.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/CODEBASE_DOCUMENTATION.md) | 🇰🇷 [한국어](i18n/ko/CODEBASE_DOCUMENTATION.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/CODEBASE_DOCUMENTATION.md) | 🇳🇱 [Nederlands](i18n/nl/CODEBASE_DOCUMENTATION.md) | 🇳🇴 [Norsk](i18n/no/CODEBASE_DOCUMENTATION.md) | 🇵🇹 [Português (Portugal)](i18n/pt/CODEBASE_DOCUMENTATION.md) | 🇷🇴 [Română](i18n/ro/CODEBASE_DOCUMENTATION.md) | 🇵🇱 [Polski](i18n/pl/CODEBASE_DOCUMENTATION.md) | 🇸🇰 [Slovenčina](i18n/sk/CODEBASE_DOCUMENTATION.md) | 🇸🇪 [Svenska](i18n/sv/CODEBASE_DOCUMENTATION.md) | 🇵🇭 [Filipino](i18n/phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
> A comprehensive, beginner-friendly guide to the **omniroute** multi-provider AI proxy router.
|
||||
|
||||
---
|
||||
|
||||
## 1. What Is omniroute?
|
||||
|
||||
omniroute is a **proxy router** that sits between AI clients (Claude CLI, Codex, Cursor IDE, etc.) and AI providers (Anthropic, Google, OpenAI, AWS, GitHub, etc.). It solves one big problem:
|
||||
|
||||
> **Different AI clients speak different "languages" (API formats), and different AI providers expect different "languages" too.** omniroute translates between them automatically.
|
||||
|
||||
Think of it like a universal translator at the United Nations — any delegate can speak any language, and the translator converts it for any other delegate.
|
||||
|
||||
---
|
||||
|
||||
## 2. Architecture Overview
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph Clients
|
||||
A[Claude CLI]
|
||||
B[Codex]
|
||||
C[Cursor IDE]
|
||||
D[OpenAI-compatible]
|
||||
end
|
||||
|
||||
subgraph omniroute
|
||||
E[Handler Layer]
|
||||
F[Translator Layer]
|
||||
G[Executor Layer]
|
||||
H[Services Layer]
|
||||
end
|
||||
|
||||
subgraph Providers
|
||||
I[Anthropic Claude]
|
||||
J[Google Gemini]
|
||||
K[OpenAI / Codex]
|
||||
L[GitHub Copilot]
|
||||
M[AWS Kiro]
|
||||
N[Antigravity]
|
||||
O[Cursor API]
|
||||
end
|
||||
|
||||
A --> E
|
||||
B --> E
|
||||
C --> E
|
||||
D --> E
|
||||
E --> F
|
||||
F --> G
|
||||
G --> I
|
||||
G --> J
|
||||
G --> K
|
||||
G --> L
|
||||
G --> M
|
||||
G --> N
|
||||
G --> O
|
||||
H -.-> E
|
||||
H -.-> G
|
||||
```
|
||||
|
||||
### Core Principle: Hub-and-Spoke Translation
|
||||
|
||||
All format translation passes through **OpenAI format as the hub**:
|
||||
|
||||
```
|
||||
Client Format → [OpenAI Hub] → Provider Format (request)
|
||||
Provider Format → [OpenAI Hub] → Client Format (response)
|
||||
```
|
||||
|
||||
This means you only need **N translators** (one per format) instead of **N²** (every pair).
|
||||
|
||||
---
|
||||
|
||||
## 3. Project Structure
|
||||
|
||||
```
|
||||
omniroute/
|
||||
├── open-sse/ ← Core proxy library (portable, framework-agnostic)
|
||||
│ ├── index.js ← Main entry point, exports everything
|
||||
│ ├── config/ ← Configuration & constants
|
||||
│ ├── executors/ ← Provider-specific request execution
|
||||
│ ├── handlers/ ← Request handling orchestration
|
||||
│ ├── services/ ← Business logic (auth, models, fallback, usage)
|
||||
│ ├── translator/ ← Format translation engine
|
||||
│ │ ├── request/ ← Request translators (8 files)
|
||||
│ │ ├── response/ ← Response translators (7 files)
|
||||
│ │ └── helpers/ ← Shared translation utilities (6 files)
|
||||
│ └── utils/ ← Utility functions
|
||||
├── src/ ← Application layer (Express/Worker runtime)
|
||||
│ ├── app/ ← Web UI, API routes, middleware
|
||||
│ ├── lib/ ← Database, auth, and shared library code
|
||||
│ ├── mitm/ ← Man-in-the-middle proxy utilities
|
||||
│ ├── models/ ← Database models
|
||||
│ ├── shared/ ← Shared utilities (wrappers around open-sse)
|
||||
│ ├── sse/ ← SSE endpoint handlers
|
||||
│ └── store/ ← State management
|
||||
├── data/ ← Runtime data (credentials, logs)
|
||||
│ └── provider-credentials.json (external credentials override, gitignored)
|
||||
└── tester/ ← Test utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Module-by-Module Breakdown
|
||||
|
||||
### 4.1 Config (`open-sse/config/`)
|
||||
|
||||
The **single source of truth** for all provider configuration.
|
||||
|
||||
| File | Purpose |
|
||||
| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `constants.ts` | `PROVIDERS` object with base URLs, OAuth credentials (defaults), headers, and default system prompts for every provider. Also defines `HTTP_STATUS`, `ERROR_TYPES`, `COOLDOWN_MS`, `BACKOFF_CONFIG`, and `SKIP_PATTERNS`. |
|
||||
| `credentialLoader.ts` | Loads external credentials from `data/provider-credentials.json` and merges them over the hardcoded defaults in `PROVIDERS`. Keeps secrets out of source control while maintaining backwards compatibility. |
|
||||
| `providerModels.ts` | Central model registry: maps provider aliases → model IDs. Functions like `getModels()`, `getProviderByAlias()`. |
|
||||
| `codexInstructions.ts` | System instructions injected into Codex requests (editing constraints, sandbox rules, approval policies). |
|
||||
| `defaultThinkingSignature.ts` | Default "thinking" signatures for Claude and Gemini models. |
|
||||
| `ollamaModels.ts` | Schema definition for local Ollama models (name, size, family, quantization). |
|
||||
|
||||
#### Credential Loading Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["App starts"] --> B["constants.ts defines PROVIDERS\nwith hardcoded defaults"]
|
||||
B --> C{"data/provider-credentials.json\nexists?"}
|
||||
C -->|Yes| D["credentialLoader reads JSON"]
|
||||
C -->|No| E["Use hardcoded defaults"]
|
||||
D --> F{"For each provider in JSON"}
|
||||
F --> G{"Provider exists\nin PROVIDERS?"}
|
||||
G -->|No| H["Log warning, skip"]
|
||||
G -->|Yes| I{"Value is object?"}
|
||||
I -->|No| J["Log warning, skip"]
|
||||
I -->|Yes| K["Merge clientId, clientSecret,\ntokenUrl, authUrl, refreshUrl"]
|
||||
K --> F
|
||||
H --> F
|
||||
J --> F
|
||||
F -->|Done| L["PROVIDERS ready with\nmerged credentials"]
|
||||
E --> L
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 Executors (`open-sse/executors/`)
|
||||
|
||||
Executors encapsulate **provider-specific logic** using the **Strategy Pattern**. Each executor overrides base methods as needed.
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BaseExecutor {
|
||||
+buildUrl(model, stream, options)
|
||||
+buildHeaders(credentials, stream, body)
|
||||
+transformRequest(body, model, stream, credentials)
|
||||
+execute(url, options)
|
||||
+shouldRetry(status, error)
|
||||
+refreshCredentials(credentials, log)
|
||||
}
|
||||
|
||||
class DefaultExecutor {
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class AntigravityExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+shouldRetry()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class CursorExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseResponse()
|
||||
+generateChecksum()
|
||||
}
|
||||
|
||||
class KiroExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseEventStream()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
BaseExecutor <|-- DefaultExecutor
|
||||
BaseExecutor <|-- AntigravityExecutor
|
||||
BaseExecutor <|-- CursorExecutor
|
||||
BaseExecutor <|-- KiroExecutor
|
||||
BaseExecutor <|-- CodexExecutor
|
||||
BaseExecutor <|-- GeminiCLIExecutor
|
||||
BaseExecutor <|-- GithubExecutor
|
||||
```
|
||||
|
||||
| Executor | Provider | Key Specializations |
|
||||
| ---------------- | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- |
|
||||
| `base.ts` | — | Abstract base: URL building, headers, retry logic, credential refresh |
|
||||
| `default.ts` | Claude, Gemini, OpenAI, GLM, Kimi, MiniMax | Generic OAuth token refresh for standard providers |
|
||||
| `antigravity.ts` | Google Cloud Code | Project/session ID generation, multi-URL fallback, custom retry parsing from error messages ("reset after 2h7m23s") |
|
||||
| `cursor.ts` | Cursor IDE | **Most complex**: SHA-256 checksum auth, Protobuf request encoding, binary EventStream → SSE response parsing |
|
||||
| `codex.ts` | OpenAI Codex | Injects system instructions, manages thinking levels, removes unsupported parameters |
|
||||
| `gemini-cli.ts` | Google Gemini CLI | Custom URL building (`streamGenerateContent`), Google OAuth token refresh |
|
||||
| `github.ts` | GitHub Copilot | Dual token system (GitHub OAuth + Copilot token), VSCode header mimicking |
|
||||
| `kiro.ts` | AWS CodeWhisperer | AWS EventStream binary parsing, AMZN event frames, token estimation |
|
||||
| `index.ts` | — | Factory: maps provider name → executor class, with default fallback |
|
||||
|
||||
---
|
||||
|
||||
### 4.3 Handlers (`open-sse/handlers/`)
|
||||
|
||||
The **orchestration layer** — coordinates translation, execution, streaming, and error handling.
|
||||
|
||||
| File | Purpose |
|
||||
| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `chatCore.ts` | **Central orchestrator** (~600 lines). Handles the complete request lifecycle: format detection → translation → executor dispatch → streaming/non-streaming response → token refresh → error handling → usage logging. |
|
||||
| `responsesHandler.ts` | Adapter for OpenAI's Responses API: converts Responses format → Chat Completions → sends to `chatCore` → converts SSE back to Responses format. |
|
||||
| `embeddings.ts` | Embedding generation handler: resolves embedding model → provider, dispatches to provider API, returns OpenAI-compatible embedding response. Supports 6+ providers. |
|
||||
| `imageGeneration.ts` | Image generation handler: resolves image model → provider, supports OpenAI-compatible, Gemini-image (Antigravity), and fallback (Nebius) modes. Returns base64 or URL images. |
|
||||
|
||||
#### Request Lifecycle (chatCore.ts)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant chatCore
|
||||
participant Translator
|
||||
participant Executor
|
||||
participant Provider
|
||||
|
||||
Client->>chatCore: Request (any format)
|
||||
chatCore->>chatCore: Detect source format
|
||||
chatCore->>chatCore: Check bypass patterns
|
||||
chatCore->>chatCore: Resolve model & provider
|
||||
chatCore->>Translator: Translate request (source → OpenAI → target)
|
||||
chatCore->>Executor: Get executor for provider
|
||||
Executor->>Executor: Build URL, headers, transform request
|
||||
Executor->>Executor: Refresh credentials if needed
|
||||
Executor->>Provider: HTTP fetch (streaming or non-streaming)
|
||||
|
||||
alt Streaming
|
||||
Provider-->>chatCore: SSE stream
|
||||
chatCore->>chatCore: Pipe through SSE transform stream
|
||||
Note over chatCore: Transform stream translates<br/>each chunk: target → OpenAI → source
|
||||
chatCore-->>Client: Translated SSE stream
|
||||
else Non-streaming
|
||||
Provider-->>chatCore: JSON response
|
||||
chatCore->>Translator: Translate response
|
||||
chatCore-->>Client: Translated JSON
|
||||
end
|
||||
|
||||
alt Error (401, 429, 500...)
|
||||
chatCore->>Executor: Retry with credential refresh
|
||||
chatCore->>chatCore: Account fallback logic
|
||||
end
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.4 Services (`open-sse/services/`)
|
||||
|
||||
Business logic that supports the handlers and executors.
|
||||
|
||||
| File | Purpose |
|
||||
| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `provider.ts` | **Format detection** (`detectFormat`): analyzes request body structure to identify Claude/OpenAI/Gemini/Antigravity/Responses formats (includes `max_tokens` heuristic for Claude). Also: URL building, header building, thinking config normalization. Supports `openai-compatible-*` and `anthropic-compatible-*` dynamic providers. |
|
||||
| `model.ts` | Model string parsing (`claude/model-name` → `{provider: "claude", model: "model-name"}`), alias resolution with collision detection, input sanitization (rejects path traversal/control chars), and model info resolution with async alias getter support. |
|
||||
| `accountFallback.ts` | Rate-limit handling: exponential backoff (1s → 2s → 4s → max 2min), account cooldown management, error classification (which errors trigger fallback vs. not). |
|
||||
| `tokenRefresh.ts` | OAuth token refresh for **every provider**: Google (Gemini, Antigravity), Claude, Codex, Qwen, iFlow, GitHub (OAuth + Copilot dual-token), Kiro (AWS SSO OIDC + Social Auth). Includes in-flight promise deduplication cache and retry with exponential backoff. |
|
||||
| `combo.ts` | **Combo models**: chains of fallback models. If model A fails with a fallback-eligible error, try model B, then C, etc. Returns actual upstream status codes. |
|
||||
| `usage.ts` | Fetches quota/usage data from provider APIs (GitHub Copilot quotas, Antigravity model quotas, Codex rate limits, Kiro usage breakdowns, Claude settings). |
|
||||
| `accountSelector.ts` | Smart account selection with scoring algorithm: considers priority, health status, round-robin position, and cooldown state to pick the optimal account for each request. |
|
||||
| `contextManager.ts` | Request context lifecycle management: creates and tracks per-request context objects with metadata (request ID, timestamps, provider info) for debugging and logging. |
|
||||
| `ipFilter.ts` | IP-based access control: supports allowlist and blocklist modes. Validates client IP against configured rules before processing API requests. |
|
||||
| `sessionManager.ts` | Session tracking with client fingerprinting: tracks active sessions using hashed client identifiers, monitors request counts, and provides session metrics. |
|
||||
| `signatureCache.ts` | Request signature-based deduplication cache: prevents duplicate requests by caching recent request signatures and returning cached responses for identical requests within a time window. |
|
||||
| `systemPrompt.ts` | Global system prompt injection: prepends or appends a configurable system prompt to all requests, with per-provider compatibility handling. |
|
||||
| `thinkingBudget.ts` | Reasoning token budget management: supports passthrough, auto (strip thinking config), custom (fixed budget), and adaptive (complexity-scaled) modes for controlling thinking/reasoning tokens. |
|
||||
| `wildcardRouter.ts` | Wildcard model pattern routing: resolves wildcard patterns (e.g., `*/claude-*`) to concrete provider/model pairs based on availability and priority. |
|
||||
|
||||
#### Token Refresh Deduplication
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant R1 as Request 1
|
||||
participant R2 as Request 2
|
||||
participant Cache as refreshPromiseCache
|
||||
participant OAuth as OAuth Provider
|
||||
|
||||
R1->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: No in-flight promise
|
||||
Cache->>OAuth: Start refresh
|
||||
R2->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: Found in-flight promise
|
||||
Cache-->>R2: Return existing promise
|
||||
OAuth-->>Cache: New access token
|
||||
Cache-->>R1: New access token
|
||||
Cache-->>R2: Same access token (shared)
|
||||
Cache->>Cache: Delete cache entry
|
||||
```
|
||||
|
||||
#### Account Fallback State Machine
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> Active
|
||||
Active --> Error: Request fails (401/429/500)
|
||||
Error --> Cooldown: Apply backoff
|
||||
Cooldown --> Active: Cooldown expires
|
||||
Active --> Active: Request succeeds (reset backoff)
|
||||
|
||||
state Error {
|
||||
[*] --> ClassifyError
|
||||
ClassifyError --> ShouldFallback: Rate limit / Auth / Transient
|
||||
ClassifyError --> NoFallback: 400 Bad Request
|
||||
}
|
||||
|
||||
state Cooldown {
|
||||
[*] --> ExponentialBackoff
|
||||
ExponentialBackoff: Level 0 = 1s
|
||||
ExponentialBackoff: Level 1 = 2s
|
||||
ExponentialBackoff: Level 2 = 4s
|
||||
ExponentialBackoff: Max = 2min
|
||||
}
|
||||
```
|
||||
|
||||
#### Combo Model Chain
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Request with\ncombo model"] --> B["Model A"]
|
||||
B -->|"2xx Success"| C["Return response"]
|
||||
B -->|"429/401/500"| D{"Fallback\neligible?"}
|
||||
D -->|Yes| E["Model B"]
|
||||
D -->|No| F["Return error"]
|
||||
E -->|"2xx Success"| C
|
||||
E -->|"429/401/500"| G{"Fallback\neligible?"}
|
||||
G -->|Yes| H["Model C"]
|
||||
G -->|No| F
|
||||
H -->|"2xx Success"| C
|
||||
H -->|"Fail"| I["All failed →\nReturn last status"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.5 Translator (`open-sse/translator/`)
|
||||
|
||||
The **format translation engine** using a self-registering plugin system.
|
||||
|
||||
#### Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "Request Translation"
|
||||
A["Claude → OpenAI"]
|
||||
B["Gemini → OpenAI"]
|
||||
C["Antigravity → OpenAI"]
|
||||
D["OpenAI Responses → OpenAI"]
|
||||
E["OpenAI → Claude"]
|
||||
F["OpenAI → Gemini"]
|
||||
G["OpenAI → Kiro"]
|
||||
H["OpenAI → Cursor"]
|
||||
end
|
||||
|
||||
subgraph "Response Translation"
|
||||
I["Claude → OpenAI"]
|
||||
J["Gemini → OpenAI"]
|
||||
K["Kiro → OpenAI"]
|
||||
L["Cursor → OpenAI"]
|
||||
M["OpenAI → Claude"]
|
||||
N["OpenAI → Antigravity"]
|
||||
O["OpenAI → Responses"]
|
||||
end
|
||||
```
|
||||
|
||||
| Directory | Files | Description |
|
||||
| ------------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `request/` | 8 translators | Convert request bodies between formats. Each file self-registers via `register(from, to, fn)` on import. |
|
||||
| `response/` | 7 translators | Convert streaming response chunks between formats. Handles SSE event types, thinking blocks, tool calls. |
|
||||
| `helpers/` | 6 helpers | Shared utilities: `claudeHelper` (system prompt extraction, thinking config), `geminiHelper` (parts/contents mapping), `openaiHelper` (format filtering), `toolCallHelper` (ID generation, missing response injection), `maxTokensHelper`, `responsesApiHelper`. |
|
||||
| `index.ts` | — | Translation engine: `translateRequest()`, `translateResponse()`, state management, registry. |
|
||||
| `formats.ts` | — | Format constants: `OPENAI`, `CLAUDE`, `GEMINI`, `ANTIGRAVITY`, `KIRO`, `CURSOR`, `OPENAI_RESPONSES`. |
|
||||
|
||||
#### Key Design: Self-Registering Plugins
|
||||
|
||||
```javascript
|
||||
// Each translator file calls register() on import:
|
||||
import { register } from "../index.js";
|
||||
register("claude", "openai", translateClaudeToOpenAI);
|
||||
|
||||
// The index.js imports all translator files, triggering registration:
|
||||
import "./request/claude-to-openai.js"; // ← self-registers
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.6 Utils (`open-sse/utils/`)
|
||||
|
||||
| File | Purpose |
|
||||
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `error.ts` | Error response building (OpenAI-compatible format), upstream error parsing, Antigravity retry-time extraction from error messages, SSE error streaming. |
|
||||
| `stream.ts` | **SSE Transform Stream** — the core streaming pipeline. Two modes: `TRANSLATE` (full format translation) and `PASSTHROUGH` (normalize + extract usage). Handles chunk buffering, usage estimation, content length tracking. Per-stream encoder/decoder instances avoid shared state. |
|
||||
| `streamHelpers.ts` | Low-level SSE utilities: `parseSSELine` (whitespace-tolerant), `hasValuableContent` (filters empty chunks for OpenAI/Claude/Gemini), `fixInvalidId`, `formatSSE` (format-aware SSE serialization with `perf_metrics` cleanup). |
|
||||
| `usageTracking.ts` | Token usage extraction from any format (Claude/OpenAI/Gemini/Responses), estimation with separate tool/message char-per-token ratios, buffer addition (2000 tokens safety margin), format-specific field filtering, console logging with ANSI colors. |
|
||||
| `requestLogger.ts` | File-based request logging (opt-in via `ENABLE_REQUEST_LOGS=true`). Creates session folders with numbered files: `1_req_client.json` → `7_res_client.txt`. All I/O is async (fire-and-forget). Masks sensitive headers. |
|
||||
| `bypassHandler.ts` | Intercepts specific patterns from Claude CLI (title extraction, warmup, count) and returns fake responses without calling any provider. Supports both streaming and non-streaming. Intentionally limited to Claude CLI scope. |
|
||||
| `networkProxy.ts` | Resolves outbound proxy URL for a given provider with precedence: provider-specific config → global config → environment variables (`HTTPS_PROXY`/`HTTP_PROXY`/`ALL_PROXY`). Supports `NO_PROXY` exclusions. Caches config for 30s. |
|
||||
|
||||
#### SSE Streaming Pipeline
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["Provider SSE stream"] --> B["TextDecoder\n(per-stream instance)"]
|
||||
B --> C["Buffer lines\n(split on newline)"]
|
||||
C --> D["parseSSELine()\n(trim whitespace, parse JSON)"]
|
||||
D --> E{"Mode?"}
|
||||
E -->|TRANSLATE| F["translateResponse()\ntarget → OpenAI → source"]
|
||||
E -->|PASSTHROUGH| G["fixInvalidId()\nnormalize chunk"]
|
||||
F --> H["hasValuableContent()\nfilter empty chunks"]
|
||||
G --> H
|
||||
H -->|"Has content"| I["extractUsage()\ntrack token counts"]
|
||||
H -->|"Empty"| J["Skip chunk"]
|
||||
I --> K["formatSSE()\nserialize + clean perf_metrics"]
|
||||
K --> L["TextEncoder\n(per-stream instance)"]
|
||||
L --> M["Enqueue to\nclient stream"]
|
||||
|
||||
style A fill:#f9f,stroke:#333
|
||||
style M fill:#9f9,stroke:#333
|
||||
```
|
||||
|
||||
#### Request Logger Session Structure
|
||||
|
||||
```
|
||||
logs/
|
||||
└── claude_gemini_claude-sonnet_20260208_143045/
|
||||
├── 1_req_client.json ← Raw client request
|
||||
├── 2_req_source.json ← After initial conversion
|
||||
├── 3_req_openai.json ← OpenAI intermediate format
|
||||
├── 4_req_target.json ← Final target format
|
||||
├── 5_res_provider.txt ← Provider SSE chunks (streaming)
|
||||
├── 5_res_provider.json ← Provider response (non-streaming)
|
||||
├── 6_res_openai.txt ← OpenAI intermediate chunks
|
||||
├── 7_res_client.txt ← Client-facing SSE chunks
|
||||
└── 6_error.json ← Error details (if any)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.7 Application Layer (`src/`)
|
||||
|
||||
| Directory | Purpose |
|
||||
| ------------- | ---------------------------------------------------------------------- |
|
||||
| `src/app/` | Web UI, API routes, Express middleware, OAuth callback handlers |
|
||||
| `src/lib/` | Database access (`localDb.ts`, `usageDb.ts`), authentication, shared |
|
||||
| `src/mitm/` | Man-in-the-middle proxy utilities for intercepting provider traffic |
|
||||
| `src/models/` | Database model definitions |
|
||||
| `src/shared/` | Wrappers around open-sse functions (provider, stream, error, etc.) |
|
||||
| `src/sse/` | SSE endpoint handlers that wire the open-sse library to Express routes |
|
||||
| `src/store/` | Application state management |
|
||||
|
||||
#### Notable API Routes
|
||||
|
||||
| Route | Methods | Purpose |
|
||||
| --------------------------------------------- | --------------- | ------------------------------------------------------------------------------------- |
|
||||
| `/api/provider-models` | GET/POST/DELETE | CRUD for custom models per provider |
|
||||
| `/api/models/catalog` | GET | Aggregated catalog of all models (chat, embedding, image, custom) grouped by provider |
|
||||
| `/api/settings/proxy` | GET/PUT/DELETE | Hierarchical outbound proxy configuration (`global/providers/combos/keys`) |
|
||||
| `/api/settings/proxy/test` | POST | Validates proxy connectivity and returns public IP/latency |
|
||||
| `/v1/providers/[provider]/chat/completions` | POST | Dedicated per-provider chat completions with model validation |
|
||||
| `/v1/providers/[provider]/embeddings` | POST | Dedicated per-provider embeddings with model validation |
|
||||
| `/v1/providers/[provider]/images/generations` | POST | Dedicated per-provider image generation with model validation |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist management |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget configuration (passthrough/auto/custom/adaptive) |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt injection for all requests |
|
||||
| `/api/sessions` | GET | Active session tracking and metrics |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
|
||||
---
|
||||
|
||||
## 5. Key Design Patterns
|
||||
|
||||
### 5.1 Hub-and-Spoke Translation
|
||||
|
||||
All formats translate through **OpenAI format as the hub**. Adding a new provider only requires writing **one pair** of translators (to/from OpenAI), not N pairs.
|
||||
|
||||
### 5.2 Executor Strategy Pattern
|
||||
|
||||
Each provider has a dedicated executor class inheriting from `BaseExecutor`. The factory in `executors/index.ts` selects the right one at runtime.
|
||||
|
||||
### 5.3 Self-Registering Plugin System
|
||||
|
||||
Translator modules register themselves on import via `register()`. Adding a new translator is just creating a file and importing it.
|
||||
|
||||
### 5.4 Account Fallback with Exponential Backoff
|
||||
|
||||
When a provider returns 429/401/500, the system can switch to the next account, applying exponential cooldowns (1s → 2s → 4s → max 2min).
|
||||
|
||||
### 5.5 Combo Model Chains
|
||||
|
||||
A "combo" groups multiple `provider/model` strings. If the first fails, fallback to the next automatically.
|
||||
|
||||
### 5.6 Stateful Streaming Translation
|
||||
|
||||
Response translation maintains state across SSE chunks (thinking block tracking, tool call accumulation, content block indexing) via the `initState()` mechanism.
|
||||
|
||||
### 5.7 Usage Safety Buffer
|
||||
|
||||
A 2000-token buffer is added to reported usage to prevent clients from hitting context window limits due to overhead from system prompts and format translation.
|
||||
|
||||
---
|
||||
|
||||
## 6. Supported Formats
|
||||
|
||||
| Format | Direction | Identifier |
|
||||
| ----------------------- | --------------- | ------------------ |
|
||||
| OpenAI Chat Completions | source + target | `openai` |
|
||||
| OpenAI Responses API | source + target | `openai-responses` |
|
||||
| Anthropic Claude | source + target | `claude` |
|
||||
| Google Gemini | source + target | `gemini` |
|
||||
| Google Gemini CLI | target only | `gemini-cli` |
|
||||
| Antigravity | source + target | `antigravity` |
|
||||
| AWS Kiro | target only | `kiro` |
|
||||
| Cursor | target only | `cursor` |
|
||||
|
||||
---
|
||||
|
||||
## 7. Supported Providers
|
||||
|
||||
| Provider | Auth Method | Executor | Key Notes |
|
||||
| ------------------------ | ---------------------- | ----------- | --------------------------------------------- |
|
||||
| Anthropic Claude | API key or OAuth | Default | Uses `x-api-key` header |
|
||||
| Google Gemini | API key or OAuth | Default | Uses `x-goog-api-key` header |
|
||||
| Google Gemini CLI | OAuth | GeminiCLI | Uses `streamGenerateContent` endpoint |
|
||||
| Antigravity | OAuth | Antigravity | Multi-URL fallback, custom retry parsing |
|
||||
| OpenAI | API key | Default | Standard Bearer auth |
|
||||
| Codex | OAuth | Codex | Injects system instructions, manages thinking |
|
||||
| GitHub Copilot | OAuth + Copilot token | Github | Dual token, VSCode header mimicking |
|
||||
| Kiro (AWS) | AWS SSO OIDC or Social | Kiro | Binary EventStream parsing |
|
||||
| Cursor IDE | Checksum auth | Cursor | Protobuf encoding, SHA-256 checksums |
|
||||
| Qwen | OAuth | Default | Standard auth |
|
||||
| iFlow | OAuth (Basic + Bearer) | Default | Dual auth header |
|
||||
| OpenRouter | API key | Default | Standard Bearer auth |
|
||||
| GLM, Kimi, MiniMax | API key | Default | Claude-compatible, use `x-api-key` |
|
||||
| `openai-compatible-*` | API key | Default | Dynamic: any OpenAI-compatible endpoint |
|
||||
| `anthropic-compatible-*` | API key | Default | Dynamic: any Claude-compatible endpoint |
|
||||
|
||||
---
|
||||
|
||||
## 8. Data Flow Summary
|
||||
|
||||
### Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor\nbuildUrl + buildHeaders"]
|
||||
D --> E["fetch(providerURL)"]
|
||||
E --> F["createSSEStream()\nTRANSLATE mode"]
|
||||
F --> G["parseSSELine()"]
|
||||
G --> H["translateResponse()\ntarget → OpenAI → source"]
|
||||
H --> I["extractUsage()\n+ addBuffer"]
|
||||
I --> J["formatSSE()"]
|
||||
J --> K["Client receives\ntranslated SSE"]
|
||||
K --> L["logUsage()\nsaveRequestUsage()"]
|
||||
```
|
||||
|
||||
### Non-Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor.execute()"]
|
||||
D --> E["translateResponse()\ntarget → OpenAI → source"]
|
||||
E --> F["Return JSON\nresponse"]
|
||||
```
|
||||
|
||||
### Bypass Flow (Claude CLI)
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Claude CLI request"] --> B{"Match bypass\npattern?"}
|
||||
B -->|"Title/Warmup/Count"| C["Generate fake\nOpenAI response"]
|
||||
B -->|"No match"| D["Normal flow"]
|
||||
C --> E["Translate to\nsource format"]
|
||||
E --> F["Return without\ncalling provider"]
|
||||
```
|
||||
@@ -0,0 +1,148 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/FEATURES.md) · 🇪🇸 [es](../es/FEATURES.md) · 🇫🇷 [fr](../fr/FEATURES.md) · 🇩🇪 [de](../de/FEATURES.md) · 🇮🇹 [it](../it/FEATURES.md) · 🇷🇺 [ru](../ru/FEATURES.md) · 🇨🇳 [zh-CN](../zh-CN/FEATURES.md) · 🇯🇵 [ja](../ja/FEATURES.md) · 🇰🇷 [ko](../ko/FEATURES.md) · 🇸🇦 [ar](../ar/FEATURES.md) · 🇮🇳 [in](../in/FEATURES.md) · 🇹🇭 [th](../th/FEATURES.md) · 🇻🇳 [vi](../vi/FEATURES.md) · 🇮🇩 [id](../id/FEATURES.md) · 🇲🇾 [ms](../ms/FEATURES.md) · 🇳🇱 [nl](../nl/FEATURES.md) · 🇵🇱 [pl](../pl/FEATURES.md) · 🇸🇪 [sv](../sv/FEATURES.md) · 🇳🇴 [no](../no/FEATURES.md) · 🇩🇰 [da](../da/FEATURES.md) · 🇫🇮 [fi](../fi/FEATURES.md) · 🇵🇹 [pt](../pt/FEATURES.md) · 🇷🇴 [ro](../ro/FEATURES.md) · 🇭🇺 [hu](../hu/FEATURES.md) · 🇧🇬 [bg](../bg/FEATURES.md) · 🇸🇰 [sk](../sk/FEATURES.md) · 🇺🇦 [uk-UA](../uk-UA/FEATURES.md) · 🇮🇱 [he](../he/FEATURES.md) · 🇵🇭 [phi](../phi/FEATURES.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Dashboard Features Gallery
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](FEATURES.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/FEATURES.md) | 🇪🇸 [Español](i18n/es/FEATURES.md) | 🇫🇷 [Français](i18n/fr/FEATURES.md) | 🇮🇹 [Italiano](i18n/it/FEATURES.md) | 🇷🇺 [Русский](i18n/ru/FEATURES.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/FEATURES.md) | 🇩🇪 [Deutsch](i18n/de/FEATURES.md) | 🇮🇳 [हिन्दी](i18n/in/FEATURES.md) | 🇹🇭 [ไทย](i18n/th/FEATURES.md) | 🇺🇦 [Українська](i18n/uk-UA/FEATURES.md) | 🇸🇦 [العربية](i18n/ar/FEATURES.md) | 🇯🇵 [日本語](i18n/ja/FEATURES.md) | 🇻🇳 [Tiếng Việt](i18n/vi/FEATURES.md) | 🇧🇬 [Български](i18n/bg/FEATURES.md) | 🇩🇰 [Dansk](i18n/da/FEATURES.md) | 🇫🇮 [Suomi](i18n/fi/FEATURES.md) | 🇮🇱 [עברית](i18n/he/FEATURES.md) | 🇭🇺 [Magyar](i18n/hu/FEATURES.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/FEATURES.md) | 🇰🇷 [한국어](i18n/ko/FEATURES.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/FEATURES.md) | 🇳🇱 [Nederlands](i18n/nl/FEATURES.md) | 🇳🇴 [Norsk](i18n/no/FEATURES.md) | 🇵🇹 [Português (Portugal)](i18n/pt/FEATURES.md) | 🇷🇴 [Română](i18n/ro/FEATURES.md) | 🇵🇱 [Polski](i18n/pl/FEATURES.md) | 🇸🇰 [Slovenčina](i18n/sk/FEATURES.md) | 🇸🇪 [Svenska](i18n/sv/FEATURES.md) | 🇵🇭 [Filipino](i18n/phi/FEATURES.md)
|
||||
|
||||
Visual guide to every section of the OmniRoute dashboard.
|
||||
|
||||
---
|
||||
|
||||
## 🔌 Providers
|
||||
|
||||
Manage AI provider connections: OAuth providers (Claude Code, Codex, Gemini CLI), API key providers (Groq, DeepSeek, OpenRouter), and free providers (iFlow, Qwen, Kiro).
|
||||
|
||||
- **Ollama Cloud** — Cloud-hosted Ollama models at `api.ollama.com` (free "Light usage" tier); use `ollamacloud/<model>` prefix
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
Create model routing combos with 6 strategies: priority, weighted, round-robin, random, least-used, and cost-optimized. Each combo chains multiple models with automatic fallback and includes quick templates and readiness checks.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 📊 Analytics
|
||||
|
||||
Comprehensive usage analytics with token consumption, cost estimates, activity heatmaps, weekly distribution charts, and per-provider breakdowns.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🏥 System Health
|
||||
|
||||
Real-time monitoring: uptime, memory, version, latency percentiles (p50/p95/p99), cache statistics, and provider circuit breaker states.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 Translator Playground
|
||||
|
||||
Four modes for debugging API translations: **Playground** (format converter), **Chat Tester** (live requests), **Test Bench** (batch tests), and **Live Monitor** (real-time stream).
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎮 Model Playground _(v2.0.9+)_
|
||||
|
||||
Test any model directly from the dashboard. Select provider, model, and endpoint, write prompts with Monaco Editor, stream responses in real-time, abort mid-stream, and view timing metrics.
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Themes _(v2.0.5+)_
|
||||
|
||||
Customizable color themes for the entire dashboard. Choose from 7 preset colors (Coral, Blue, Red, Green, Violet, Orange, Cyan) or create a custom theme by picking any hex color. Supports light, dark, and system mode.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Settings
|
||||
|
||||
Comprehensive settings panel with tabs:
|
||||
|
||||
- **General** — System storage, backup management (export/import database)
|
||||
- **Appearance** — Theme selector (dark/light/system), color theme presets and custom colors, health log visibility
|
||||
- **Security** — API endpoint protection, custom provider blocking, IP filtering, session info
|
||||
- **Routing** — Model aliases, background task degradation
|
||||
- **Resilience** — Rate limit persistence, circuit breaker tuning
|
||||
- **Advanced** — Configuration overrides
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Tools
|
||||
|
||||
One-click configuration for AI coding tools: Claude Code, Codex CLI, Gemini CLI, OpenClaw, Kilo Code, Antigravity, Cline, Continue, Cursor, and Factory Droid. Features automated config apply/reset, connection profiles, and model mapping.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🤖 CLI Agents _(v2.0.11+)_
|
||||
|
||||
Dashboard for discovering and managing CLI agents. Shows a grid of 14 built-in agents (Codex, Claude, Goose, Gemini CLI, OpenClaw, Aider, OpenCode, Cline, Qwen Code, ForgeCode, Amazon Q, Open Interpreter, Cursor CLI, Warp) with:
|
||||
|
||||
- **Installation status** — Installed / Not Found with version detection
|
||||
- **Protocol badges** — stdio, HTTP, etc.
|
||||
- **Custom agents** — Register any CLI tool via form (name, binary, version command, spawn args)
|
||||
- **CLI Fingerprint Matching** — Per-provider toggle to match native CLI request signatures, reducing ban risk while preserving proxy IP
|
||||
|
||||
---
|
||||
|
||||
## 🖼️ Media _(v2.0.3+)_
|
||||
|
||||
Generate images, videos, and music from the dashboard. Supports OpenAI, xAI, Together, Hyperbolic, SD WebUI, ComfyUI, AnimateDiff, Stable Audio Open, and MusicGen.
|
||||
|
||||
---
|
||||
|
||||
## 📝 Request Logs
|
||||
|
||||
Real-time request logging with filtering by provider, model, account, and API key. Shows status codes, token usage, latency, and response details.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🌐 API Endpoint
|
||||
|
||||
Your unified API endpoint with capability breakdown: Chat Completions, Responses API, Embeddings, Image Generation, Reranking, Audio Transcription, Text-to-Speech, Moderations, and registered API keys. Cloud proxy support for remote access.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔑 API Key Management
|
||||
|
||||
Create, scope, and revoke API keys. Each key can be restricted to specific models/providers with full access or read-only permissions. Visual key management with usage tracking.
|
||||
|
||||
---
|
||||
|
||||
## 📋 Audit Log
|
||||
|
||||
Administrative action tracking with filtering by action type, actor, target, IP address, and timestamp. Full security event history.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application
|
||||
|
||||
Native Electron desktop app for Windows, macOS, and Linux. Run OmniRoute as a standalone application with system tray integration, offline support, auto-update, and one-click install.
|
||||
|
||||
Key features:
|
||||
|
||||
- Server readiness polling (no blank screen on cold start)
|
||||
- System tray with port management
|
||||
- Content Security Policy
|
||||
- Single-instance lock
|
||||
- Auto-update on restart
|
||||
- Platform-conditional UI (macOS traffic lights, Windows/Linux default titlebar)
|
||||
|
||||
📖 See [`electron/README.md`](../electron/README.md) for full documentation.
|
||||
@@ -0,0 +1,87 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/MCP-SERVER.md) · 🇪🇸 [es](../es/MCP-SERVER.md) · 🇫🇷 [fr](../fr/MCP-SERVER.md) · 🇩🇪 [de](../de/MCP-SERVER.md) · 🇮🇹 [it](../it/MCP-SERVER.md) · 🇷🇺 [ru](../ru/MCP-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/MCP-SERVER.md) · 🇯🇵 [ja](../ja/MCP-SERVER.md) · 🇰🇷 [ko](../ko/MCP-SERVER.md) · 🇸🇦 [ar](../ar/MCP-SERVER.md) · 🇮🇳 [in](../in/MCP-SERVER.md) · 🇹🇭 [th](../th/MCP-SERVER.md) · 🇻🇳 [vi](../vi/MCP-SERVER.md) · 🇮🇩 [id](../id/MCP-SERVER.md) · 🇲🇾 [ms](../ms/MCP-SERVER.md) · 🇳🇱 [nl](../nl/MCP-SERVER.md) · 🇵🇱 [pl](../pl/MCP-SERVER.md) · 🇸🇪 [sv](../sv/MCP-SERVER.md) · 🇳🇴 [no](../no/MCP-SERVER.md) · 🇩🇰 [da](../da/MCP-SERVER.md) · 🇫🇮 [fi](../fi/MCP-SERVER.md) · 🇵🇹 [pt](../pt/MCP-SERVER.md) · 🇷🇴 [ro](../ro/MCP-SERVER.md) · 🇭🇺 [hu](../hu/MCP-SERVER.md) · 🇧🇬 [bg](../bg/MCP-SERVER.md) · 🇸🇰 [sk](../sk/MCP-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/MCP-SERVER.md) · 🇮🇱 [he](../he/MCP-SERVER.md) · 🇵🇭 [phi](../phi/MCP-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute MCP Server Documentation
|
||||
|
||||
> Model Context Protocol server with 16 intelligent tools
|
||||
|
||||
## Installation
|
||||
|
||||
OmniRoute MCP is built-in. Start it with:
|
||||
|
||||
```bash
|
||||
omniroute --mcp
|
||||
```
|
||||
|
||||
Or via the open-sse transport:
|
||||
|
||||
```bash
|
||||
# HTTP streamable transport (port 20130)
|
||||
omniroute --dev # MCP auto-starts on /mcp endpoint
|
||||
```
|
||||
|
||||
## IDE Configuration
|
||||
|
||||
See [IDE Configs](integrations/ide-configs.md) for Antigravity, Cursor, Copilot, and Claude Desktop setup.
|
||||
|
||||
---
|
||||
|
||||
## Essential Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :------------------------------ | :--------------------------------------- |
|
||||
| `omniroute_get_health` | Gateway health, circuit breakers, uptime |
|
||||
| `omniroute_list_combos` | All configured combos with models |
|
||||
| `omniroute_get_combo_metrics` | Performance metrics for a specific combo |
|
||||
| `omniroute_switch_combo` | Switch active combo by ID/name |
|
||||
| `omniroute_check_quota` | Quota status per provider or all |
|
||||
| `omniroute_route_request` | Send a chat completion through OmniRoute |
|
||||
| `omniroute_cost_report` | Cost analytics for a time period |
|
||||
| `omniroute_list_models_catalog` | Full model catalog with capabilities |
|
||||
|
||||
## Advanced Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :--------------------------------- | :---------------------------------------------- |
|
||||
| `omniroute_simulate_route` | Dry-run routing simulation with fallback tree |
|
||||
| `omniroute_set_budget_guard` | Session budget with degrade/block/alert actions |
|
||||
| `omniroute_set_resilience_profile` | Apply conservative/balanced/aggressive preset |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo |
|
||||
| `omniroute_get_provider_metrics` | Detailed metrics for one provider |
|
||||
| `omniroute_best_combo_for_task` | Task-fitness recommendation with alternatives |
|
||||
| `omniroute_explain_route` | Explain a past routing decision |
|
||||
| `omniroute_get_session_snapshot` | Full session state: costs, tokens, errors |
|
||||
|
||||
## Authentication
|
||||
|
||||
MCP tools are authenticated via API key scopes. Each tool requires specific scopes:
|
||||
|
||||
| Scope | Tools |
|
||||
| :------------- | :----------------------------------------------- |
|
||||
| `read:health` | get_health, get_provider_metrics |
|
||||
| `read:combos` | list_combos, get_combo_metrics |
|
||||
| `write:combos` | switch_combo |
|
||||
| `read:quota` | check_quota |
|
||||
| `write:route` | route_request, simulate_route, test_combo |
|
||||
| `read:usage` | cost_report, get_session_snapshot, explain_route |
|
||||
| `write:config` | set_budget_guard, set_resilience_profile |
|
||||
| `read:models` | list_models_catalog, best_combo_for_task |
|
||||
|
||||
## Audit Logging
|
||||
|
||||
Every tool call is logged to `mcp_tool_audit` with:
|
||||
|
||||
- Tool name, arguments, result
|
||||
- Duration (ms), success/failure
|
||||
- API key hash, timestamp
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------------ |
|
||||
| `open-sse/mcp-server/server.ts` | MCP server creation + 16 tool registrations |
|
||||
| `open-sse/mcp-server/transport.ts` | Stdio + HTTP transport |
|
||||
| `open-sse/mcp-server/auth.ts` | API key + scope validation |
|
||||
| `open-sse/mcp-server/audit.ts` | Tool call audit logging |
|
||||
| `open-sse/mcp-server/tools/advancedTools.ts` | 8 advanced tool handlers |
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,37 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/RELEASE_CHECKLIST.md) · 🇪🇸 [es](../es/RELEASE_CHECKLIST.md) · 🇫🇷 [fr](../fr/RELEASE_CHECKLIST.md) · 🇩🇪 [de](../de/RELEASE_CHECKLIST.md) · 🇮🇹 [it](../it/RELEASE_CHECKLIST.md) · 🇷🇺 [ru](../ru/RELEASE_CHECKLIST.md) · 🇨🇳 [zh-CN](../zh-CN/RELEASE_CHECKLIST.md) · 🇯🇵 [ja](../ja/RELEASE_CHECKLIST.md) · 🇰🇷 [ko](../ko/RELEASE_CHECKLIST.md) · 🇸🇦 [ar](../ar/RELEASE_CHECKLIST.md) · 🇮🇳 [in](../in/RELEASE_CHECKLIST.md) · 🇹🇭 [th](../th/RELEASE_CHECKLIST.md) · 🇻🇳 [vi](../vi/RELEASE_CHECKLIST.md) · 🇮🇩 [id](../id/RELEASE_CHECKLIST.md) · 🇲🇾 [ms](../ms/RELEASE_CHECKLIST.md) · 🇳🇱 [nl](../nl/RELEASE_CHECKLIST.md) · 🇵🇱 [pl](../pl/RELEASE_CHECKLIST.md) · 🇸🇪 [sv](../sv/RELEASE_CHECKLIST.md) · 🇳🇴 [no](../no/RELEASE_CHECKLIST.md) · 🇩🇰 [da](../da/RELEASE_CHECKLIST.md) · 🇫🇮 [fi](../fi/RELEASE_CHECKLIST.md) · 🇵🇹 [pt](../pt/RELEASE_CHECKLIST.md) · 🇷🇴 [ro](../ro/RELEASE_CHECKLIST.md) · 🇭🇺 [hu](../hu/RELEASE_CHECKLIST.md) · 🇧🇬 [bg](../bg/RELEASE_CHECKLIST.md) · 🇸🇰 [sk](../sk/RELEASE_CHECKLIST.md) · 🇺🇦 [uk-UA](../uk-UA/RELEASE_CHECKLIST.md) · 🇮🇱 [he](../he/RELEASE_CHECKLIST.md) · 🇵🇭 [phi](../phi/RELEASE_CHECKLIST.md)
|
||||
|
||||
---
|
||||
|
||||
# Release Checklist
|
||||
|
||||
Use this checklist before tagging or publishing a new OmniRoute release.
|
||||
|
||||
## Version and Changelog
|
||||
|
||||
1. Bump `package.json` version (`x.y.z`) in the release branch.
|
||||
2. Move release notes from `## [Unreleased]` in `CHANGELOG.md` to a dated section:
|
||||
- `## [x.y.z] — YYYY-MM-DD`
|
||||
3. Keep `## [Unreleased]` as the first changelog section for upcoming work.
|
||||
4. Ensure the latest semver section in `CHANGELOG.md` equals `package.json` version.
|
||||
|
||||
## API Docs
|
||||
|
||||
1. Update `docs/openapi.yaml`:
|
||||
- `info.version` must equal `package.json` version.
|
||||
2. Validate endpoint examples if API contracts changed.
|
||||
|
||||
## Runtime Docs
|
||||
|
||||
1. Review `docs/ARCHITECTURE.md` for storage/runtime drift.
|
||||
2. Review `docs/TROUBLESHOOTING.md` for env var and operational drift.
|
||||
3. Update localized docs if source docs changed significantly.
|
||||
|
||||
## Automated Check
|
||||
|
||||
Run the sync guard locally before opening PR:
|
||||
|
||||
```bash
|
||||
npm run check:docs-sync
|
||||
```
|
||||
|
||||
CI also runs this check in `.github/workflows/ci.yml` (lint job).
|
||||
@@ -0,0 +1,258 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/TROUBLESHOOTING.md) · 🇪🇸 [es](../es/TROUBLESHOOTING.md) · 🇫🇷 [fr](../fr/TROUBLESHOOTING.md) · 🇩🇪 [de](../de/TROUBLESHOOTING.md) · 🇮🇹 [it](../it/TROUBLESHOOTING.md) · 🇷🇺 [ru](../ru/TROUBLESHOOTING.md) · 🇨🇳 [zh-CN](../zh-CN/TROUBLESHOOTING.md) · 🇯🇵 [ja](../ja/TROUBLESHOOTING.md) · 🇰🇷 [ko](../ko/TROUBLESHOOTING.md) · 🇸🇦 [ar](../ar/TROUBLESHOOTING.md) · 🇮🇳 [in](../in/TROUBLESHOOTING.md) · 🇹🇭 [th](../th/TROUBLESHOOTING.md) · 🇻🇳 [vi](../vi/TROUBLESHOOTING.md) · 🇮🇩 [id](../id/TROUBLESHOOTING.md) · 🇲🇾 [ms](../ms/TROUBLESHOOTING.md) · 🇳🇱 [nl](../nl/TROUBLESHOOTING.md) · 🇵🇱 [pl](../pl/TROUBLESHOOTING.md) · 🇸🇪 [sv](../sv/TROUBLESHOOTING.md) · 🇳🇴 [no](../no/TROUBLESHOOTING.md) · 🇩🇰 [da](../da/TROUBLESHOOTING.md) · 🇫🇮 [fi](../fi/TROUBLESHOOTING.md) · 🇵🇹 [pt](../pt/TROUBLESHOOTING.md) · 🇷🇴 [ro](../ro/TROUBLESHOOTING.md) · 🇭🇺 [hu](../hu/TROUBLESHOOTING.md) · 🇧🇬 [bg](../bg/TROUBLESHOOTING.md) · 🇸🇰 [sk](../sk/TROUBLESHOOTING.md) · 🇺🇦 [uk-UA](../uk-UA/TROUBLESHOOTING.md) · 🇮🇱 [he](../he/TROUBLESHOOTING.md) · 🇵🇭 [phi](../phi/TROUBLESHOOTING.md)
|
||||
|
||||
---
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](TROUBLESHOOTING.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/TROUBLESHOOTING.md) | 🇪🇸 [Español](i18n/es/TROUBLESHOOTING.md) | 🇫🇷 [Français](i18n/fr/TROUBLESHOOTING.md) | 🇮🇹 [Italiano](i18n/it/TROUBLESHOOTING.md) | 🇷🇺 [Русский](i18n/ru/TROUBLESHOOTING.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/TROUBLESHOOTING.md) | 🇩🇪 [Deutsch](i18n/de/TROUBLESHOOTING.md) | 🇮🇳 [हिन्दी](i18n/in/TROUBLESHOOTING.md) | 🇹🇭 [ไทย](i18n/th/TROUBLESHOOTING.md) | 🇺🇦 [Українська](i18n/uk-UA/TROUBLESHOOTING.md) | 🇸🇦 [العربية](i18n/ar/TROUBLESHOOTING.md) | 🇯🇵 [日本語](i18n/ja/TROUBLESHOOTING.md) | 🇻🇳 [Tiếng Việt](i18n/vi/TROUBLESHOOTING.md) | 🇧🇬 [Български](i18n/bg/TROUBLESHOOTING.md) | 🇩🇰 [Dansk](i18n/da/TROUBLESHOOTING.md) | 🇫🇮 [Suomi](i18n/fi/TROUBLESHOOTING.md) | 🇮🇱 [עברית](i18n/he/TROUBLESHOOTING.md) | 🇭🇺 [Magyar](i18n/hu/TROUBLESHOOTING.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/TROUBLESHOOTING.md) | 🇰🇷 [한국어](i18n/ko/TROUBLESHOOTING.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/TROUBLESHOOTING.md) | 🇳🇱 [Nederlands](i18n/nl/TROUBLESHOOTING.md) | 🇳🇴 [Norsk](i18n/no/TROUBLESHOOTING.md) | 🇵🇹 [Português (Portugal)](i18n/pt/TROUBLESHOOTING.md) | 🇷🇴 [Română](i18n/ro/TROUBLESHOOTING.md) | 🇵🇱 [Polski](i18n/pl/TROUBLESHOOTING.md) | 🇸🇰 [Slovenčina](i18n/sk/TROUBLESHOOTING.md) | 🇸🇪 [Svenska](i18n/sv/TROUBLESHOOTING.md) | 🇵🇭 [Filipino](i18n/phi/TROUBLESHOOTING.md)
|
||||
|
||||
Common problems and solutions for OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Quick Fixes
|
||||
|
||||
| Problem | Solution |
|
||||
| ----------------------------- | ------------------------------------------------------------------ |
|
||||
| First login not working | Set `INITIAL_PASSWORD` in `.env` (no hardcoded default) |
|
||||
| Dashboard opens on wrong port | Set `PORT=20128` and `NEXT_PUBLIC_BASE_URL=http://localhost:20128` |
|
||||
| No request logs under `logs/` | Set `ENABLE_REQUEST_LOGS=true` |
|
||||
| EACCES: permission denied | Set `DATA_DIR=/path/to/writable/dir` to override `~/.omniroute` |
|
||||
| Routing strategy not saving | Update to v1.4.11+ (Zod schema fix for settings persistence) |
|
||||
|
||||
---
|
||||
|
||||
## Provider Issues
|
||||
|
||||
### "Language model did not provide messages"
|
||||
|
||||
**Cause:** Provider quota exhausted.
|
||||
|
||||
**Fix:**
|
||||
|
||||
1. Check dashboard quota tracker
|
||||
2. Use a combo with fallback tiers
|
||||
3. Switch to cheaper/free tier
|
||||
|
||||
### Rate Limiting
|
||||
|
||||
**Cause:** Subscription quota exhausted.
|
||||
|
||||
**Fix:**
|
||||
|
||||
- Add fallback: `cc/claude-opus-4-6 → glm/glm-4.7 → if/kimi-k2-thinking`
|
||||
- Use GLM/MiniMax as cheap backup
|
||||
|
||||
### OAuth Token Expired
|
||||
|
||||
OmniRoute auto-refreshes tokens. If issues persist:
|
||||
|
||||
1. Dashboard → Provider → Reconnect
|
||||
2. Delete and re-add the provider connection
|
||||
|
||||
---
|
||||
|
||||
## Cloud Issues
|
||||
|
||||
### Cloud Sync Errors
|
||||
|
||||
1. Verify `BASE_URL` points to your running instance (e.g., `http://localhost:20128`)
|
||||
2. Verify `CLOUD_URL` points to your cloud endpoint (e.g., `https://omniroute.dev`)
|
||||
3. Keep `NEXT_PUBLIC_*` values aligned with server-side values
|
||||
|
||||
### Cloud `stream=false` Returns 500
|
||||
|
||||
**Symptom:** `Unexpected token 'd'...` on cloud endpoint for non-streaming calls.
|
||||
|
||||
**Cause:** Upstream returns SSE payload while client expects JSON.
|
||||
|
||||
**Workaround:** Use `stream=true` for cloud direct calls. Local runtime includes SSE→JSON fallback.
|
||||
|
||||
### Cloud Says Connected but "Invalid API key"
|
||||
|
||||
1. Create a fresh key from local dashboard (`/api/keys`)
|
||||
2. Run cloud sync: Enable Cloud → Sync Now
|
||||
3. Old/non-synced keys can still return `401` on cloud
|
||||
|
||||
---
|
||||
|
||||
## Docker Issues
|
||||
|
||||
### CLI Tool Shows Not Installed
|
||||
|
||||
1. Check runtime fields: `curl http://localhost:20128/api/cli-tools/runtime/codex | jq`
|
||||
2. For portable mode: use image target `runner-cli` (bundled CLIs)
|
||||
3. For host mount mode: set `CLI_EXTRA_PATHS` and mount host bin directory as read-only
|
||||
4. If `installed=true` and `runnable=false`: binary was found but failed healthcheck
|
||||
|
||||
### Quick Runtime Validation
|
||||
|
||||
```bash
|
||||
curl -s http://localhost:20128/api/cli-tools/codex-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
curl -s http://localhost:20128/api/cli-tools/claude-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
curl -s http://localhost:20128/api/cli-tools/openclaw-settings | jq '{installed,runnable,commandPath,runtimeMode,reason}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cost Issues
|
||||
|
||||
### High Costs
|
||||
|
||||
1. Check usage stats in Dashboard → Usage
|
||||
2. Switch primary model to GLM/MiniMax
|
||||
3. Use free tier (Gemini CLI, iFlow) for non-critical tasks
|
||||
4. Set cost budgets per API key: Dashboard → API Keys → Budget
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
### Enable Request Logs
|
||||
|
||||
Set `ENABLE_REQUEST_LOGS=true` in your `.env` file. Logs appear under `logs/` directory.
|
||||
|
||||
### Check Provider Health
|
||||
|
||||
```bash
|
||||
# Health dashboard
|
||||
http://localhost:20128/dashboard/health
|
||||
|
||||
# API health check
|
||||
curl http://localhost:20128/api/monitoring/health
|
||||
```
|
||||
|
||||
### Runtime Storage
|
||||
|
||||
- Main state: `${DATA_DIR}/storage.sqlite` (providers, combos, aliases, keys, settings)
|
||||
- Usage: SQLite tables in `storage.sqlite` (`usage_history`, `call_logs`, `proxy_logs`) + optional `${DATA_DIR}/log.txt` and `${DATA_DIR}/call_logs/`
|
||||
- Request logs: `<repo>/logs/...` (when `ENABLE_REQUEST_LOGS=true`)
|
||||
|
||||
---
|
||||
|
||||
## Circuit Breaker Issues
|
||||
|
||||
### Provider stuck in OPEN state
|
||||
|
||||
When a provider's circuit breaker is OPEN, requests are blocked until the cooldown expires.
|
||||
|
||||
**Fix:**
|
||||
|
||||
1. Go to **Dashboard → Settings → Resilience**
|
||||
2. Check the circuit breaker card for the affected provider
|
||||
3. Click **Reset All** to clear all breakers, or wait for the cooldown to expire
|
||||
4. Verify the provider is actually available before resetting
|
||||
|
||||
### Provider keeps tripping the circuit breaker
|
||||
|
||||
If a provider repeatedly enters OPEN state:
|
||||
|
||||
1. Check **Dashboard → Health → Provider Health** for the failure pattern
|
||||
2. Go to **Settings → Resilience → Provider Profiles** and increase the failure threshold
|
||||
3. Check if the provider has changed API limits or requires re-authentication
|
||||
4. Review latency telemetry — high latency may cause timeout-based failures
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription Issues
|
||||
|
||||
### "Unsupported model" error
|
||||
|
||||
- Ensure you're using the correct prefix: `deepgram/nova-3` or `assemblyai/best`
|
||||
- Verify the provider is connected in **Dashboard → Providers**
|
||||
|
||||
### Transcription returns empty or fails
|
||||
|
||||
- Check supported audio formats: `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`
|
||||
- Verify file size is within provider limits (typically < 25MB)
|
||||
- Check provider API key validity in the provider card
|
||||
|
||||
---
|
||||
|
||||
## Translator Debugging
|
||||
|
||||
Use **Dashboard → Translator** to debug format translation issues:
|
||||
|
||||
| Mode | When to Use |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------- |
|
||||
| **Playground** | Compare input/output formats side by side — paste a failing request to see how it translates |
|
||||
| **Chat Tester** | Send live messages and inspect the full request/response payload including headers |
|
||||
| **Test Bench** | Run batch tests across format combinations to find which translations are broken |
|
||||
| **Live Monitor** | Watch real-time request flow to catch intermittent translation issues |
|
||||
|
||||
### Common format issues
|
||||
|
||||
- **Thinking tags not appearing** — Check if the target provider supports thinking and the thinking budget setting
|
||||
- **Tool calls dropping** — Some format translations may strip unsupported fields; verify in Playground mode
|
||||
- **System prompt missing** — Claude and Gemini handle system prompts differently; check translation output
|
||||
- **SDK returns raw string instead of object** — Fixed in v1.1.0: response sanitizer now strips non-standard fields (`x_groq`, `usage_breakdown`, etc.) that cause OpenAI SDK Pydantic validation failures
|
||||
- **GLM/ERNIE rejects `system` role** — Fixed in v1.1.0: role normalizer automatically merges system messages into user messages for incompatible models
|
||||
- **`developer` role not recognized** — Fixed in v1.1.0: automatically converted to `system` for non-OpenAI providers
|
||||
- **`json_schema` not working with Gemini** — Fixed in v1.1.0: `response_format` is now converted to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
---
|
||||
|
||||
## Resilience Settings
|
||||
|
||||
### Auto rate-limit not triggering
|
||||
|
||||
- Auto rate-limit only applies to API key providers (not OAuth/subscription)
|
||||
- Verify **Settings → Resilience → Provider Profiles** has auto-rate-limit enabled
|
||||
- Check if the provider returns `429` status codes or `Retry-After` headers
|
||||
|
||||
### Tuning exponential backoff
|
||||
|
||||
Provider profiles support these settings:
|
||||
|
||||
- **Base delay** — Initial wait time after first failure (default: 1s)
|
||||
- **Max delay** — Maximum wait time cap (default: 30s)
|
||||
- **Multiplier** — How much to increase delay per consecutive failure (default: 2x)
|
||||
|
||||
### Anti-thundering herd
|
||||
|
||||
When many concurrent requests hit a rate-limited provider, OmniRoute uses mutex + auto rate-limiting to serialize requests and prevent cascading failures. This is automatic for API key providers.
|
||||
|
||||
---
|
||||
|
||||
## Optional RAG / LLM failure taxonomy (16 problems)
|
||||
|
||||
Some OmniRoute users place the gateway in front of RAG or agent stacks. In those setups it is common to see a strange pattern: OmniRoute looks healthy (providers up, routing profiles ok, no rate limit alerts) but the final answer is still wrong.
|
||||
|
||||
In practice these incidents usually come from the downstream RAG pipeline, not from the gateway itself.
|
||||
|
||||
If you want a shared vocabulary to describe those failures you can use the WFGY ProblemMap, an external MIT license text resource that defines sixteen recurring RAG / LLM failure patterns. At a high level it covers:
|
||||
|
||||
- retrieval drift and broken context boundaries
|
||||
- empty or stale indexes and vector stores
|
||||
- embedding versus semantic mismatch
|
||||
- prompt assembly and context window issues
|
||||
- logic collapse and overconfident answers
|
||||
- long chain and agent coordination failures
|
||||
- multi agent memory and role drift
|
||||
- deployment and bootstrap ordering problems
|
||||
|
||||
The idea is simple:
|
||||
|
||||
1. When you investigate a bad response, capture:
|
||||
- user task and request
|
||||
- route or provider combo in OmniRoute
|
||||
- any RAG context used downstream (retrieved documents, tool calls, etc)
|
||||
2. Map the incident to one or two WFGY ProblemMap numbers (`No.1` … `No.16`).
|
||||
3. Store the number in your own dashboard, runbook, or incident tracker next to the OmniRoute logs.
|
||||
4. Use the corresponding WFGY page to decide whether you need to change your RAG stack, retriever, or routing strategy.
|
||||
|
||||
Full text and concrete recipes live here (MIT license, text only):
|
||||
|
||||
[WFGY ProblemMap README](https://github.com/onestardao/WFGY/blob/main/ProblemMap/README.md)
|
||||
|
||||
You can ignore this section if you do not run RAG or agent pipelines behind OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Still Stuck?
|
||||
|
||||
- **GitHub Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **Architecture**: See [`docs/ARCHITECTURE.md`](ARCHITECTURE.md) for internal details
|
||||
- **API Reference**: See [`docs/API_REFERENCE.md`](API_REFERENCE.md) for all endpoints
|
||||
- **Health Dashboard**: Check **Dashboard → Health** for real-time system status
|
||||
- **Translator**: Use **Dashboard → Translator** to debug format issues
|
||||
@@ -0,0 +1,813 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/USER_GUIDE.md) · 🇪🇸 [es](../es/USER_GUIDE.md) · 🇫🇷 [fr](../fr/USER_GUIDE.md) · 🇩🇪 [de](../de/USER_GUIDE.md) · 🇮🇹 [it](../it/USER_GUIDE.md) · 🇷🇺 [ru](../ru/USER_GUIDE.md) · 🇨🇳 [zh-CN](../zh-CN/USER_GUIDE.md) · 🇯🇵 [ja](../ja/USER_GUIDE.md) · 🇰🇷 [ko](../ko/USER_GUIDE.md) · 🇸🇦 [ar](../ar/USER_GUIDE.md) · 🇮🇳 [in](../in/USER_GUIDE.md) · 🇹🇭 [th](../th/USER_GUIDE.md) · 🇻🇳 [vi](../vi/USER_GUIDE.md) · 🇮🇩 [id](../id/USER_GUIDE.md) · 🇲🇾 [ms](../ms/USER_GUIDE.md) · 🇳🇱 [nl](../nl/USER_GUIDE.md) · 🇵🇱 [pl](../pl/USER_GUIDE.md) · 🇸🇪 [sv](../sv/USER_GUIDE.md) · 🇳🇴 [no](../no/USER_GUIDE.md) · 🇩🇰 [da](../da/USER_GUIDE.md) · 🇫🇮 [fi](../fi/USER_GUIDE.md) · 🇵🇹 [pt](../pt/USER_GUIDE.md) · 🇷🇴 [ro](../ro/USER_GUIDE.md) · 🇭🇺 [hu](../hu/USER_GUIDE.md) · 🇧🇬 [bg](../bg/USER_GUIDE.md) · 🇸🇰 [sk](../sk/USER_GUIDE.md) · 🇺🇦 [uk-UA](../uk-UA/USER_GUIDE.md) · 🇮🇱 [he](../he/USER_GUIDE.md) · 🇵🇭 [phi](../phi/USER_GUIDE.md)
|
||||
|
||||
---
|
||||
|
||||
# User Guide
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](USER_GUIDE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/USER_GUIDE.md) | 🇪🇸 [Español](i18n/es/USER_GUIDE.md) | 🇫🇷 [Français](i18n/fr/USER_GUIDE.md) | 🇮🇹 [Italiano](i18n/it/USER_GUIDE.md) | 🇷🇺 [Русский](i18n/ru/USER_GUIDE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/USER_GUIDE.md) | 🇩🇪 [Deutsch](i18n/de/USER_GUIDE.md) | 🇮🇳 [हिन्दी](i18n/in/USER_GUIDE.md) | 🇹🇭 [ไทย](i18n/th/USER_GUIDE.md) | 🇺🇦 [Українська](i18n/uk-UA/USER_GUIDE.md) | 🇸🇦 [العربية](i18n/ar/USER_GUIDE.md) | 🇯🇵 [日本語](i18n/ja/USER_GUIDE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/USER_GUIDE.md) | 🇧🇬 [Български](i18n/bg/USER_GUIDE.md) | 🇩🇰 [Dansk](i18n/da/USER_GUIDE.md) | 🇫🇮 [Suomi](i18n/fi/USER_GUIDE.md) | 🇮🇱 [עברית](i18n/he/USER_GUIDE.md) | 🇭🇺 [Magyar](i18n/hu/USER_GUIDE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/USER_GUIDE.md) | 🇰🇷 [한국어](i18n/ko/USER_GUIDE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/USER_GUIDE.md) | 🇳🇱 [Nederlands](i18n/nl/USER_GUIDE.md) | 🇳🇴 [Norsk](i18n/no/USER_GUIDE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/USER_GUIDE.md) | 🇷🇴 [Română](i18n/ro/USER_GUIDE.md) | 🇵🇱 [Polski](i18n/pl/USER_GUIDE.md) | 🇸🇰 [Slovenčina](i18n/sk/USER_GUIDE.md) | 🇸🇪 [Svenska](i18n/sv/USER_GUIDE.md) | 🇵🇭 [Filipino](i18n/phi/USER_GUIDE.md)
|
||||
|
||||
Complete guide for configuring providers, creating combos, integrating CLI tools, and deploying OmniRoute.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Pricing at a Glance](#-pricing-at-a-glance)
|
||||
- [Use Cases](#-use-cases)
|
||||
- [Provider Setup](#-provider-setup)
|
||||
- [CLI Integration](#-cli-integration)
|
||||
- [Deployment](#-deployment)
|
||||
- [Available Models](#-available-models)
|
||||
- [Advanced Features](#-advanced-features)
|
||||
|
||||
---
|
||||
|
||||
## 💰 Pricing at a Glance
|
||||
|
||||
| Tier | Provider | Cost | Quota Reset | Best For |
|
||||
| ------------------- | ----------------- | ----------- | ---------------- | -------------------- |
|
||||
| **💳 SUBSCRIPTION** | Claude Code (Pro) | $20/mo | 5h + weekly | Already subscribed |
|
||||
| | Codex (Plus/Pro) | $20-200/mo | 5h + weekly | OpenAI users |
|
||||
| | Gemini CLI | **FREE** | 180K/mo + 1K/day | Everyone! |
|
||||
| | GitHub Copilot | $10-19/mo | Monthly | GitHub users |
|
||||
| **🔑 API KEY** | DeepSeek | Pay per use | None | Cheap reasoning |
|
||||
| | Groq | Pay per use | None | Ultra-fast inference |
|
||||
| | xAI (Grok) | Pay per use | None | Grok 4 reasoning |
|
||||
| | Mistral | Pay per use | None | EU-hosted models |
|
||||
| | Perplexity | Pay per use | None | Search-augmented |
|
||||
| | Together AI | Pay per use | None | Open-source models |
|
||||
| | Fireworks AI | Pay per use | None | Fast FLUX images |
|
||||
| | Cerebras | Pay per use | None | Wafer-scale speed |
|
||||
| | Cohere | Pay per use | None | Command R+ RAG |
|
||||
| | NVIDIA NIM | Pay per use | None | Enterprise models |
|
||||
| **💰 CHEAP** | GLM-4.7 | $0.6/1M | Daily 10AM | Budget backup |
|
||||
| | MiniMax M2.1 | $0.2/1M | 5-hour rolling | Cheapest option |
|
||||
| | Kimi K2 | $9/mo flat | 10M tokens/mo | Predictable cost |
|
||||
| **🆓 FREE** | iFlow | $0 | Unlimited | 8 models free |
|
||||
| | Qwen | $0 | Unlimited | 3 models free |
|
||||
| | Kiro | $0 | Unlimited | Claude free |
|
||||
|
||||
**💡 Pro Tip:** Start with Gemini CLI (180K free/month) + iFlow (unlimited free) combo = $0 cost!
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Use Cases
|
||||
|
||||
### Case 1: "I have Claude Pro subscription"
|
||||
|
||||
**Problem:** Quota expires unused, rate limits during heavy coding
|
||||
|
||||
```
|
||||
Combo: "maximize-claude"
|
||||
1. cc/claude-opus-4-6 (use subscription fully)
|
||||
2. glm/glm-4.7 (cheap backup when quota out)
|
||||
3. if/kimi-k2-thinking (free emergency fallback)
|
||||
|
||||
Monthly cost: $20 (subscription) + ~$5 (backup) = $25 total
|
||||
vs. $20 + hitting limits = frustration
|
||||
```
|
||||
|
||||
### Case 2: "I want zero cost"
|
||||
|
||||
**Problem:** Can't afford subscriptions, need reliable AI coding
|
||||
|
||||
```
|
||||
Combo: "free-forever"
|
||||
1. gc/gemini-3-flash (180K free/month)
|
||||
2. if/kimi-k2-thinking (unlimited free)
|
||||
3. qw/qwen3-coder-plus (unlimited free)
|
||||
|
||||
Monthly cost: $0
|
||||
Quality: Production-ready models
|
||||
```
|
||||
|
||||
### Case 3: "I need 24/7 coding, no interruptions"
|
||||
|
||||
**Problem:** Deadlines, can't afford downtime
|
||||
|
||||
```
|
||||
Combo: "always-on"
|
||||
1. cc/claude-opus-4-6 (best quality)
|
||||
2. cx/gpt-5.2-codex (second subscription)
|
||||
3. glm/glm-4.7 (cheap, resets daily)
|
||||
4. minimax/MiniMax-M2.1 (cheapest, 5h reset)
|
||||
5. if/kimi-k2-thinking (free unlimited)
|
||||
|
||||
Result: 5 layers of fallback = zero downtime
|
||||
Monthly cost: $20-200 (subscriptions) + $10-20 (backup)
|
||||
```
|
||||
|
||||
### Case 4: "I want FREE AI in OpenClaw"
|
||||
|
||||
**Problem:** Need AI assistant in messaging apps, completely free
|
||||
|
||||
```
|
||||
Combo: "openclaw-free"
|
||||
1. if/glm-4.7 (unlimited free)
|
||||
2. if/minimax-m2.1 (unlimited free)
|
||||
3. if/kimi-k2-thinking (unlimited free)
|
||||
|
||||
Monthly cost: $0
|
||||
Access via: WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📖 Provider Setup
|
||||
|
||||
### 🔐 Subscription Providers
|
||||
|
||||
#### Claude Code (Pro/Max)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Claude Code
|
||||
→ OAuth login → Auto token refresh
|
||||
→ 5-hour + weekly quota tracking
|
||||
|
||||
Models:
|
||||
cc/claude-opus-4-6
|
||||
cc/claude-sonnet-4-5-20250929
|
||||
cc/claude-haiku-4-5-20251001
|
||||
```
|
||||
|
||||
**Pro Tip:** Use Opus for complex tasks, Sonnet for speed. OmniRoute tracks quota per model!
|
||||
|
||||
#### OpenAI Codex (Plus/Pro)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Codex
|
||||
→ OAuth login (port 1455)
|
||||
→ 5-hour + weekly reset
|
||||
|
||||
Models:
|
||||
cx/gpt-5.2-codex
|
||||
cx/gpt-5.1-codex-max
|
||||
```
|
||||
|
||||
#### Gemini CLI (FREE 180K/month!)
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect Gemini CLI
|
||||
→ Google OAuth
|
||||
→ 180K completions/month + 1K/day
|
||||
|
||||
Models:
|
||||
gc/gemini-3-flash-preview
|
||||
gc/gemini-2.5-pro
|
||||
```
|
||||
|
||||
**Best Value:** Huge free tier! Use this before paid tiers.
|
||||
|
||||
#### GitHub Copilot
|
||||
|
||||
```bash
|
||||
Dashboard → Providers → Connect GitHub
|
||||
→ OAuth via GitHub
|
||||
→ Monthly reset (1st of month)
|
||||
|
||||
Models:
|
||||
gh/gpt-5
|
||||
gh/claude-4.5-sonnet
|
||||
gh/gemini-3-pro
|
||||
```
|
||||
|
||||
### 💰 Cheap Providers
|
||||
|
||||
#### GLM-4.7 (Daily reset, $0.6/1M)
|
||||
|
||||
1. Sign up: [Zhipu AI](https://open.bigmodel.cn/)
|
||||
2. Get API key from Coding Plan
|
||||
3. Dashboard → Add API Key: Provider: `glm`, API Key: `your-key`
|
||||
|
||||
**Use:** `glm/glm-4.7` — **Pro Tip:** Coding Plan offers 3× quota at 1/7 cost! Reset daily 10:00 AM.
|
||||
|
||||
#### MiniMax M2.1 (5h reset, $0.20/1M)
|
||||
|
||||
1. Sign up: [MiniMax](https://www.minimax.io/)
|
||||
2. Get API key → Dashboard → Add API Key
|
||||
|
||||
**Use:** `minimax/MiniMax-M2.1` — **Pro Tip:** Cheapest option for long context (1M tokens)!
|
||||
|
||||
#### Kimi K2 ($9/month flat)
|
||||
|
||||
1. Subscribe: [Moonshot AI](https://platform.moonshot.ai/)
|
||||
2. Get API key → Dashboard → Add API Key
|
||||
|
||||
**Use:** `kimi/kimi-latest` — **Pro Tip:** Fixed $9/month for 10M tokens = $0.90/1M effective cost!
|
||||
|
||||
### 🆓 FREE Providers
|
||||
|
||||
#### iFlow (8 FREE models)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect iFlow → OAuth login → Unlimited usage
|
||||
|
||||
Models: if/kimi-k2-thinking, if/qwen3-coder-plus, if/glm-4.7, if/minimax-m2, if/deepseek-r1
|
||||
```
|
||||
|
||||
#### Qwen (3 FREE models)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect Qwen → Device code auth → Unlimited usage
|
||||
|
||||
Models: qw/qwen3-coder-plus, qw/qwen3-coder-flash
|
||||
```
|
||||
|
||||
#### Kiro (Claude FREE)
|
||||
|
||||
```bash
|
||||
Dashboard → Connect Kiro → AWS Builder ID or Google/GitHub → Unlimited
|
||||
|
||||
Models: kr/claude-sonnet-4.5, kr/claude-haiku-4.5
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
### Example 1: Maximize Subscription → Cheap Backup
|
||||
|
||||
```
|
||||
Dashboard → Combos → Create New
|
||||
|
||||
Name: premium-coding
|
||||
Models:
|
||||
1. cc/claude-opus-4-6 (Subscription primary)
|
||||
2. glm/glm-4.7 (Cheap backup, $0.6/1M)
|
||||
3. minimax/MiniMax-M2.1 (Cheapest fallback, $0.20/1M)
|
||||
|
||||
Use in CLI: premium-coding
|
||||
```
|
||||
|
||||
### Example 2: Free-Only (Zero Cost)
|
||||
|
||||
```
|
||||
Name: free-combo
|
||||
Models:
|
||||
1. gc/gemini-3-flash-preview (180K free/month)
|
||||
2. if/kimi-k2-thinking (unlimited)
|
||||
3. qw/qwen3-coder-plus (unlimited)
|
||||
|
||||
Cost: $0 forever!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Integration
|
||||
|
||||
### Cursor IDE
|
||||
|
||||
```
|
||||
Settings → Models → Advanced:
|
||||
OpenAI API Base URL: http://localhost:20128/v1
|
||||
OpenAI API Key: [from omniroute dashboard]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
|
||||
Edit `~/.claude/config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"anthropic_api_base": "http://localhost:20128/v1",
|
||||
"anthropic_api_key": "your-omniroute-api-key"
|
||||
}
|
||||
```
|
||||
|
||||
### Codex CLI
|
||||
|
||||
```bash
|
||||
export OPENAI_BASE_URL="http://localhost:20128"
|
||||
export OPENAI_API_KEY="your-omniroute-api-key"
|
||||
codex "your prompt"
|
||||
```
|
||||
|
||||
### OpenClaw
|
||||
|
||||
Edit `~/.openclaw/openclaw.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": { "primary": "omniroute/if/glm-4.7" }
|
||||
}
|
||||
},
|
||||
"models": {
|
||||
"providers": {
|
||||
"omniroute": {
|
||||
"baseUrl": "http://localhost:20128/v1",
|
||||
"apiKey": "your-omniroute-api-key",
|
||||
"api": "openai-completions",
|
||||
"models": [{ "id": "if/glm-4.7", "name": "glm-4.7" }]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Or use Dashboard:** CLI Tools → OpenClaw → Auto-config
|
||||
|
||||
### Cline / Continue / RooCode
|
||||
|
||||
```
|
||||
Provider: OpenAI Compatible
|
||||
Base URL: http://localhost:20128/v1
|
||||
API Key: [from dashboard]
|
||||
Model: cc/claude-opus-4-6
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Deployment
|
||||
|
||||
### Global npm install (Recommended)
|
||||
|
||||
```bash
|
||||
npm install -g omniroute
|
||||
|
||||
# Create config directory
|
||||
mkdir -p ~/.omniroute
|
||||
|
||||
# Create .env file (see .env.example)
|
||||
cp .env.example ~/.omniroute/.env
|
||||
|
||||
# Start server
|
||||
omniroute
|
||||
# Or with custom port:
|
||||
omniroute --port 3000
|
||||
```
|
||||
|
||||
The CLI automatically loads `.env` from `~/.omniroute/.env` or `./.env`.
|
||||
|
||||
### VPS Deployment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/diegosouzapw/OmniRoute.git
|
||||
cd OmniRoute && npm install && npm run build
|
||||
|
||||
export JWT_SECRET="your-secure-secret-change-this"
|
||||
export INITIAL_PASSWORD="your-password"
|
||||
export DATA_DIR="/var/lib/omniroute"
|
||||
export PORT="20128"
|
||||
export HOSTNAME="0.0.0.0"
|
||||
export NODE_ENV="production"
|
||||
export NEXT_PUBLIC_BASE_URL="http://localhost:20128"
|
||||
export API_KEY_SECRET="endpoint-proxy-api-key-secret"
|
||||
|
||||
npm run start
|
||||
# Or: pm2 start npm --name omniroute -- start
|
||||
```
|
||||
|
||||
### PM2 Deployment (Low Memory)
|
||||
|
||||
For servers with limited RAM, use the memory limit option:
|
||||
|
||||
```bash
|
||||
# With 512MB limit (default)
|
||||
pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or with custom memory limit
|
||||
OMNIROUTE_MEMORY_MB=512 pm2 start npm --name omniroute -- start
|
||||
|
||||
# Or using ecosystem.config.js
|
||||
pm2 start ecosystem.config.js
|
||||
```
|
||||
|
||||
Create `ecosystem.config.js`:
|
||||
|
||||
```javascript
|
||||
module.exports = {
|
||||
apps: [
|
||||
{
|
||||
name: "omniroute",
|
||||
script: "npm",
|
||||
args: "start",
|
||||
env: {
|
||||
NODE_ENV: "production",
|
||||
OMNIROUTE_MEMORY_MB: "512",
|
||||
JWT_SECRET: "your-secret",
|
||||
INITIAL_PASSWORD: "your-password",
|
||||
},
|
||||
node_args: "--max-old-space-size=512",
|
||||
max_memory_restart: "300M",
|
||||
},
|
||||
],
|
||||
};
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
# Build image (default = runner-cli with codex/claude/droid preinstalled)
|
||||
docker build -t omniroute:cli .
|
||||
|
||||
# Portable mode (recommended)
|
||||
docker run -d --name omniroute -p 20128:20128 --env-file ./.env -v omniroute-data:/app/data omniroute:cli
|
||||
```
|
||||
|
||||
For host-integrated mode with CLI binaries, see the Docker section in the main docs.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ------------------------- | ------------------------------------ | ------------------------------------------------------- |
|
||||
| `JWT_SECRET` | `omniroute-default-secret-change-me` | JWT signing secret (**change in production**) |
|
||||
| `INITIAL_PASSWORD` | `123456` | First login password |
|
||||
| `DATA_DIR` | `~/.omniroute` | Data directory (db, usage, logs) |
|
||||
| `PORT` | framework default | Service port (`20128` in examples) |
|
||||
| `HOSTNAME` | framework default | Bind host (Docker defaults to `0.0.0.0`) |
|
||||
| `NODE_ENV` | runtime default | Set `production` for deploy |
|
||||
| `BASE_URL` | `http://localhost:20128` | Server-side internal base URL |
|
||||
| `CLOUD_URL` | `https://omniroute.dev` | Cloud sync endpoint base URL |
|
||||
| `API_KEY_SECRET` | `endpoint-proxy-api-key-secret` | HMAC secret for generated API keys |
|
||||
| `REQUIRE_API_KEY` | `false` | Enforce Bearer API key on `/v1/*` |
|
||||
| `ENABLE_REQUEST_LOGS` | `false` | Enables request/response logs |
|
||||
| `AUTH_COOKIE_SECURE` | `false` | Force `Secure` auth cookie (behind HTTPS reverse proxy) |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit in MB |
|
||||
| `PROMPT_CACHE_MAX_SIZE` | `50` | Max prompt cache entries |
|
||||
| `SEMANTIC_CACHE_MAX_SIZE` | `100` | Max semantic cache entries |
|
||||
|
||||
For the full environment variable reference, see the [README](../README.md).
|
||||
|
||||
---
|
||||
|
||||
## 📊 Available Models
|
||||
|
||||
<details>
|
||||
<summary><b>View all available models</b></summary>
|
||||
|
||||
**Claude Code (`cc/`)** — Pro/Max: `cc/claude-opus-4-6`, `cc/claude-sonnet-4-5-20250929`, `cc/claude-haiku-4-5-20251001`
|
||||
|
||||
**Codex (`cx/`)** — Plus/Pro: `cx/gpt-5.2-codex`, `cx/gpt-5.1-codex-max`
|
||||
|
||||
**Gemini CLI (`gc/`)** — FREE: `gc/gemini-3-flash-preview`, `gc/gemini-2.5-pro`
|
||||
|
||||
**GitHub Copilot (`gh/`)**: `gh/gpt-5`, `gh/claude-4.5-sonnet`
|
||||
|
||||
**GLM (`glm/`)** — $0.6/1M: `glm/glm-4.7`
|
||||
|
||||
**MiniMax (`minimax/`)** — $0.2/1M: `minimax/MiniMax-M2.1`
|
||||
|
||||
**iFlow (`if/`)** — FREE: `if/kimi-k2-thinking`, `if/qwen3-coder-plus`, `if/deepseek-r1`
|
||||
|
||||
**Qwen (`qw/`)** — FREE: `qw/qwen3-coder-plus`, `qw/qwen3-coder-flash`
|
||||
|
||||
**Kiro (`kr/`)** — FREE: `kr/claude-sonnet-4.5`, `kr/claude-haiku-4.5`
|
||||
|
||||
**DeepSeek (`ds/`)**: `ds/deepseek-chat`, `ds/deepseek-reasoner`
|
||||
|
||||
**Groq (`groq/`)**: `groq/llama-3.3-70b-versatile`, `groq/llama-4-maverick-17b-128e-instruct`
|
||||
|
||||
**xAI (`xai/`)**: `xai/grok-4`, `xai/grok-4-0709-fast-reasoning`, `xai/grok-code-mini`
|
||||
|
||||
**Mistral (`mistral/`)**: `mistral/mistral-large-2501`, `mistral/codestral-2501`
|
||||
|
||||
**Perplexity (`pplx/`)**: `pplx/sonar-pro`, `pplx/sonar`
|
||||
|
||||
**Together AI (`together/`)**: `together/meta-llama/Llama-3.3-70B-Instruct-Turbo`
|
||||
|
||||
**Fireworks AI (`fireworks/`)**: `fireworks/accounts/fireworks/models/deepseek-v3p1`
|
||||
|
||||
**Cerebras (`cerebras/`)**: `cerebras/llama-3.3-70b`
|
||||
|
||||
**Cohere (`cohere/`)**: `cohere/command-r-plus-08-2024`
|
||||
|
||||
**NVIDIA NIM (`nvidia/`)**: `nvidia/nvidia/llama-3.3-70b-instruct`
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Advanced Features
|
||||
|
||||
### Custom Models
|
||||
|
||||
Add any model ID to any provider without waiting for an app update:
|
||||
|
||||
```bash
|
||||
# Via API
|
||||
curl -X POST http://localhost:20128/api/provider-models \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"provider": "openai", "modelId": "gpt-4.5-preview", "modelName": "GPT-4.5 Preview"}'
|
||||
|
||||
# List: curl http://localhost:20128/api/provider-models?provider=openai
|
||||
# Remove: curl -X DELETE "http://localhost:20128/api/provider-models?provider=openai&model=gpt-4.5-preview"
|
||||
```
|
||||
|
||||
Or use Dashboard: **Providers → [Provider] → Custom Models**.
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
Route requests directly to a specific provider with model validation:
|
||||
|
||||
```bash
|
||||
POST http://localhost:20128/v1/providers/openai/chat/completions
|
||||
POST http://localhost:20128/v1/providers/openai/embeddings
|
||||
POST http://localhost:20128/v1/providers/fireworks/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
### Network Proxy Configuration
|
||||
|
||||
```bash
|
||||
# Set global proxy
|
||||
curl -X PUT http://localhost:20128/api/settings/proxy \
|
||||
-d '{"global": {"type":"http","host":"proxy.example.com","port":"8080"}}'
|
||||
|
||||
# Per-provider proxy
|
||||
curl -X PUT http://localhost:20128/api/settings/proxy \
|
||||
-d '{"providers": {"openai": {"type":"socks5","host":"proxy.example.com","port":"1080"}}}'
|
||||
|
||||
# Test proxy
|
||||
curl -X POST http://localhost:20128/api/settings/proxy/test \
|
||||
-d '{"proxy":{"type":"socks5","host":"proxy.example.com","port":"1080"}}'
|
||||
```
|
||||
|
||||
**Precedence:** Key-specific → Combo-specific → Provider-specific → Global → Environment.
|
||||
|
||||
### Model Catalog API
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/api/models/catalog
|
||||
```
|
||||
|
||||
Returns models grouped by provider with types (`chat`, `embedding`, `image`).
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
- Sync providers, combos, and settings across devices
|
||||
- Automatic background sync with timeout + fail-fast
|
||||
- Prefer server-side `BASE_URL`/`CLOUD_URL` in production
|
||||
|
||||
### LLM Gateway Intelligence (Phase 9)
|
||||
|
||||
- **Semantic Cache** — Auto-caches non-streaming, temperature=0 responses (bypass with `X-OmniRoute-No-Cache: true`)
|
||||
- **Request Idempotency** — Deduplicates requests within 5s via `Idempotency-Key` or `X-Request-Id` header
|
||||
- **Progress Tracking** — Opt-in SSE `event: progress` events via `X-OmniRoute-Progress: true` header
|
||||
|
||||
---
|
||||
|
||||
### Translator Playground
|
||||
|
||||
Access via **Dashboard → Translator**. Debug and visualize how OmniRoute translates API requests between providers.
|
||||
|
||||
| Mode | Purpose |
|
||||
| ---------------- | -------------------------------------------------------------------------------------- |
|
||||
| **Playground** | Select source/target formats, paste a request, and see the translated output instantly |
|
||||
| **Chat Tester** | Send live chat messages through the proxy and inspect the full request/response cycle |
|
||||
| **Test Bench** | Run batch tests across multiple format combinations to verify translation correctness |
|
||||
| **Live Monitor** | Watch real-time translations as requests flow through the proxy |
|
||||
|
||||
**Use cases:**
|
||||
|
||||
- Debug why a specific client/provider combination fails
|
||||
- Verify that thinking tags, tool calls, and system prompts translate correctly
|
||||
- Compare format differences between OpenAI, Claude, Gemini, and Responses API formats
|
||||
|
||||
---
|
||||
|
||||
### Routing Strategies
|
||||
|
||||
Configure via **Dashboard → Settings → Routing**.
|
||||
|
||||
| Strategy | Description |
|
||||
| ------------------------------ | ------------------------------------------------------------------------------------------------ |
|
||||
| **Fill First** | Uses accounts in priority order — primary account handles all requests until unavailable |
|
||||
| **Round Robin** | Cycles through all accounts with a configurable sticky limit (default: 3 calls per account) |
|
||||
| **P2C (Power of Two Choices)** | Picks 2 random accounts and routes to the healthier one — balances load with awareness of health |
|
||||
| **Random** | Randomly selects an account for each request using Fisher-Yates shuffle |
|
||||
| **Least Used** | Routes to the account with the oldest `lastUsedAt` timestamp, distributing traffic evenly |
|
||||
| **Cost Optimized** | Routes to the account with the lowest priority value, optimizing for lowest-cost providers |
|
||||
|
||||
#### Wildcard Model Aliases
|
||||
|
||||
Create wildcard patterns to remap model names:
|
||||
|
||||
```
|
||||
Pattern: claude-sonnet-* → Target: cc/claude-sonnet-4-5-20250929
|
||||
Pattern: gpt-* → Target: gh/gpt-5.1-codex
|
||||
```
|
||||
|
||||
Wildcards support `*` (any characters) and `?` (single character).
|
||||
|
||||
#### Fallback Chains
|
||||
|
||||
Define global fallback chains that apply across all requests:
|
||||
|
||||
```
|
||||
Chain: production-fallback
|
||||
1. cc/claude-opus-4-6
|
||||
2. gh/gpt-5.1-codex
|
||||
3. glm/glm-4.7
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Resilience & Circuit Breakers
|
||||
|
||||
Configure via **Dashboard → Settings → Resilience**.
|
||||
|
||||
OmniRoute implements provider-level resilience with four components:
|
||||
|
||||
1. **Provider Profiles** — Per-provider configuration for:
|
||||
- Failure threshold (how many failures before opening)
|
||||
- Cooldown duration
|
||||
- Rate limit detection sensitivity
|
||||
- Exponential backoff parameters
|
||||
|
||||
2. **Editable Rate Limits** — System-level defaults configurable in the dashboard:
|
||||
- **Requests Per Minute (RPM)** — Maximum requests per minute per account
|
||||
- **Min Time Between Requests** — Minimum gap in milliseconds between requests
|
||||
- **Max Concurrent Requests** — Maximum simultaneous requests per account
|
||||
- Click **Edit** to modify, then **Save** or **Cancel**. Values persist via the resilience API.
|
||||
|
||||
3. **Circuit Breaker** — Tracks failures per provider and automatically opens the circuit when a threshold is reached:
|
||||
- **CLOSED** (Healthy) — Requests flow normally
|
||||
- **OPEN** — Provider is temporarily blocked after repeated failures
|
||||
- **HALF_OPEN** — Testing if provider has recovered
|
||||
|
||||
4. **Policies & Locked Identifiers** — Shows circuit breaker status and locked identifiers with force-unlock capability.
|
||||
|
||||
5. **Rate Limit Auto-Detection** — Monitors `429` and `Retry-After` headers to proactively avoid hitting provider rate limits.
|
||||
|
||||
**Pro Tip:** Use **Reset All** button to clear all circuit breakers and cooldowns when a provider recovers from an outage.
|
||||
|
||||
---
|
||||
|
||||
### Database Export / Import
|
||||
|
||||
Manage database backups in **Dashboard → Settings → System & Storage**.
|
||||
|
||||
| Action | Description |
|
||||
| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| **Export Database** | Downloads the current SQLite database as a `.sqlite` file |
|
||||
| **Export All (.tar.gz)** | Downloads a full backup archive including: database, settings, combos, provider connections (no credentials), API key metadata |
|
||||
| **Import Database** | Upload a `.sqlite` file to replace the current database. A pre-import backup is automatically created |
|
||||
|
||||
```bash
|
||||
# API: Export database
|
||||
curl -o backup.sqlite http://localhost:20128/api/db-backups/export
|
||||
|
||||
# API: Export all (full archive)
|
||||
curl -o backup.tar.gz http://localhost:20128/api/db-backups/exportAll
|
||||
|
||||
# API: Import database
|
||||
curl -X POST http://localhost:20128/api/db-backups/import \
|
||||
-F "file=@backup.sqlite"
|
||||
```
|
||||
|
||||
**Import Validation:** The imported file is validated for integrity (SQLite pragma check), required tables (`provider_connections`, `provider_nodes`, `combos`, `api_keys`), and size (max 100MB).
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Migrate OmniRoute between machines
|
||||
- Create external backups for disaster recovery
|
||||
- Share configurations between team members (export all → share archive)
|
||||
|
||||
---
|
||||
|
||||
### Settings Dashboard
|
||||
|
||||
The settings page is organized into 5 tabs for easy navigation:
|
||||
|
||||
| Tab | Contents |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------- |
|
||||
| **Security** | Login/Password settings, IP Access Control, API auth for `/models`, and Provider Blocking |
|
||||
| **Routing** | Global routing strategy (6 options), wildcard model aliases, fallback chains, combo defaults |
|
||||
| **Resilience** | Provider profiles, editable rate limits, circuit breaker status, policies & locked identifiers |
|
||||
| **AI** | Thinking budget configuration, global system prompt injection, prompt cache stats |
|
||||
| **Advanced** | Global proxy configuration (HTTP/SOCKS5) |
|
||||
|
||||
---
|
||||
|
||||
### Costs & Budget Management
|
||||
|
||||
Access via **Dashboard → Costs**.
|
||||
|
||||
| Tab | Purpose |
|
||||
| ----------- | ---------------------------------------------------------------------------------------- |
|
||||
| **Budget** | Set spending limits per API key with daily/weekly/monthly budgets and real-time tracking |
|
||||
| **Pricing** | View and edit model pricing entries — cost per 1K input/output tokens per provider |
|
||||
|
||||
```bash
|
||||
# API: Set a budget
|
||||
curl -X POST http://localhost:20128/api/usage/budget \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"keyId": "key-123", "limit": 50.00, "period": "monthly"}'
|
||||
|
||||
# API: Get current budget status
|
||||
curl http://localhost:20128/api/usage/budget
|
||||
```
|
||||
|
||||
**Cost Tracking:** Every request logs token usage and calculates cost using the pricing table. View breakdowns in **Dashboard → Usage** by provider, model, and API key.
|
||||
|
||||
---
|
||||
|
||||
### Audio Transcription
|
||||
|
||||
OmniRoute supports audio transcription via the OpenAI-compatible endpoint:
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
|
||||
# Example with curl
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@audio.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
Available providers: **Deepgram** (`deepgram/`), **AssemblyAI** (`assemblyai/`).
|
||||
|
||||
Supported audio formats: `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
### Combo Balancing Strategies
|
||||
|
||||
Configure per-combo balancing in **Dashboard → Combos → Create/Edit → Strategy**.
|
||||
|
||||
| Strategy | Description |
|
||||
| ------------------ | ------------------------------------------------------------------------ |
|
||||
| **Round-Robin** | Rotates through models sequentially |
|
||||
| **Priority** | Always tries the first model; falls back only on error |
|
||||
| **Random** | Picks a random model from the combo for each request |
|
||||
| **Weighted** | Routes proportionally based on assigned weights per model |
|
||||
| **Least-Used** | Routes to the model with the fewest recent requests (uses combo metrics) |
|
||||
| **Cost-Optimized** | Routes to the cheapest available model (uses pricing table) |
|
||||
|
||||
Global combo defaults can be set in **Dashboard → Settings → Routing → Combo Defaults**.
|
||||
|
||||
---
|
||||
|
||||
### Health Dashboard
|
||||
|
||||
Access via **Dashboard → Health**. Real-time system health overview with 6 cards:
|
||||
|
||||
| Card | What It Shows |
|
||||
| --------------------- | ----------------------------------------------------------- |
|
||||
| **System Status** | Uptime, version, memory usage, data directory |
|
||||
| **Provider Health** | Per-provider circuit breaker state (Closed/Open/Half-Open) |
|
||||
| **Rate Limits** | Active rate limit cooldowns per account with remaining time |
|
||||
| **Active Lockouts** | Providers temporarily blocked by the lockout policy |
|
||||
| **Signature Cache** | Deduplication cache stats (active keys, hit rate) |
|
||||
| **Latency Telemetry** | p50/p95/p99 latency aggregation per provider |
|
||||
|
||||
**Pro Tip:** The Health page auto-refreshes every 10 seconds. Use the circuit breaker card to identify which providers are experiencing issues.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application (Electron)
|
||||
|
||||
OmniRoute is available as a native desktop application for Windows, macOS, and Linux.
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# From the electron directory:
|
||||
cd electron
|
||||
npm install
|
||||
|
||||
# Development mode (connect to running Next.js dev server):
|
||||
npm run dev
|
||||
|
||||
# Production mode (uses standalone build):
|
||||
npm start
|
||||
```
|
||||
|
||||
### Building Installers
|
||||
|
||||
```bash
|
||||
cd electron
|
||||
npm run build # Current platform
|
||||
npm run build:win # Windows (.exe NSIS)
|
||||
npm run build:mac # macOS (.dmg universal)
|
||||
npm run build:linux # Linux (.AppImage)
|
||||
```
|
||||
|
||||
Output → `electron/dist-electron/`
|
||||
|
||||
### Key Features
|
||||
|
||||
| Feature | Description |
|
||||
| --------------------------- | ---------------------------------------------------- |
|
||||
| **Server Readiness** | Polls server before showing window (no blank screen) |
|
||||
| **System Tray** | Minimize to tray, change port, quit from tray menu |
|
||||
| **Port Management** | Change server port from tray (auto-restarts server) |
|
||||
| **Content Security Policy** | Restrictive CSP via session headers |
|
||||
| **Single Instance** | Only one app instance can run at a time |
|
||||
| **Offline Mode** | Bundled Next.js server works without internet |
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| --------------------- | ------- | -------------------------------- |
|
||||
| `OMNIROUTE_PORT` | `20128` | Server port |
|
||||
| `OMNIROUTE_MEMORY_MB` | `512` | Node.js heap limit (64–16384 MB) |
|
||||
|
||||
📖 Full documentation: [`electron/README.md`](../electron/README.md)
|
||||
@@ -0,0 +1,403 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/VM_DEPLOYMENT_GUIDE.md) · 🇪🇸 [es](../es/VM_DEPLOYMENT_GUIDE.md) · 🇫🇷 [fr](../fr/VM_DEPLOYMENT_GUIDE.md) · 🇩🇪 [de](../de/VM_DEPLOYMENT_GUIDE.md) · 🇮🇹 [it](../it/VM_DEPLOYMENT_GUIDE.md) · 🇷🇺 [ru](../ru/VM_DEPLOYMENT_GUIDE.md) · 🇨🇳 [zh-CN](../zh-CN/VM_DEPLOYMENT_GUIDE.md) · 🇯🇵 [ja](../ja/VM_DEPLOYMENT_GUIDE.md) · 🇰🇷 [ko](../ko/VM_DEPLOYMENT_GUIDE.md) · 🇸🇦 [ar](../ar/VM_DEPLOYMENT_GUIDE.md) · 🇮🇳 [in](../in/VM_DEPLOYMENT_GUIDE.md) · 🇹🇭 [th](../th/VM_DEPLOYMENT_GUIDE.md) · 🇻🇳 [vi](../vi/VM_DEPLOYMENT_GUIDE.md) · 🇮🇩 [id](../id/VM_DEPLOYMENT_GUIDE.md) · 🇲🇾 [ms](../ms/VM_DEPLOYMENT_GUIDE.md) · 🇳🇱 [nl](../nl/VM_DEPLOYMENT_GUIDE.md) · 🇵🇱 [pl](../pl/VM_DEPLOYMENT_GUIDE.md) · 🇸🇪 [sv](../sv/VM_DEPLOYMENT_GUIDE.md) · 🇳🇴 [no](../no/VM_DEPLOYMENT_GUIDE.md) · 🇩🇰 [da](../da/VM_DEPLOYMENT_GUIDE.md) · 🇫🇮 [fi](../fi/VM_DEPLOYMENT_GUIDE.md) · 🇵🇹 [pt](../pt/VM_DEPLOYMENT_GUIDE.md) · 🇷🇴 [ro](../ro/VM_DEPLOYMENT_GUIDE.md) · 🇭🇺 [hu](../hu/VM_DEPLOYMENT_GUIDE.md) · 🇧🇬 [bg](../bg/VM_DEPLOYMENT_GUIDE.md) · 🇸🇰 [sk](../sk/VM_DEPLOYMENT_GUIDE.md) · 🇺🇦 [uk-UA](../uk-UA/VM_DEPLOYMENT_GUIDE.md) · 🇮🇱 [he](../he/VM_DEPLOYMENT_GUIDE.md) · 🇵🇭 [phi](../phi/VM_DEPLOYMENT_GUIDE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Guia de Deploy em VM com Cloudflare
|
||||
|
||||
Guia completo para instalar e configurar o OmniRoute em uma VM (VPS) com domínio gerenciado via Cloudflare.
|
||||
|
||||
---
|
||||
|
||||
## Pré-Requisitos
|
||||
|
||||
| Item | Mínimo | Recomendado |
|
||||
| ----------- | ------------------------ | ---------------- |
|
||||
| **CPU** | 1 vCPU | 2 vCPU |
|
||||
| **RAM** | 1 GB | 2 GB |
|
||||
| **Disco** | 10 GB SSD | 25 GB SSD |
|
||||
| **SO** | Ubuntu 22.04 LTS | Ubuntu 24.04 LTS |
|
||||
| **Domínio** | Registrado no Cloudflare | — |
|
||||
| **Docker** | Docker Engine 24+ | Docker 27+ |
|
||||
|
||||
**Providers testados**: Akamai (Linode), DigitalOcean, Vultr, Hetzner, AWS Lightsail.
|
||||
|
||||
---
|
||||
|
||||
## 1. Configurar a VM
|
||||
|
||||
### 1.1 Criar a instância
|
||||
|
||||
No seu provider de VPS preferido:
|
||||
|
||||
- Escolha Ubuntu 24.04 LTS
|
||||
- Selecione o plano mínimo (1 vCPU / 1 GB RAM)
|
||||
- Defina uma senha forte para root ou configure SSH key
|
||||
- Anote o **IP público** (ex: `203.0.113.10`)
|
||||
|
||||
### 1.2 Conectar via SSH
|
||||
|
||||
```bash
|
||||
ssh root@203.0.113.10
|
||||
```
|
||||
|
||||
### 1.3 Atualizar o sistema
|
||||
|
||||
```bash
|
||||
apt update && apt upgrade -y
|
||||
```
|
||||
|
||||
### 1.4 Instalar Docker
|
||||
|
||||
```bash
|
||||
# Instalar dependências
|
||||
apt install -y ca-certificates curl gnupg
|
||||
|
||||
# Adicionar repositório oficial do Docker
|
||||
install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
apt update
|
||||
apt install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
|
||||
```
|
||||
|
||||
### 1.5 Instalar nginx
|
||||
|
||||
```bash
|
||||
apt install -y nginx
|
||||
```
|
||||
|
||||
### 1.6 Configurar Firewall (UFW)
|
||||
|
||||
```bash
|
||||
ufw default deny incoming
|
||||
ufw default allow outgoing
|
||||
ufw allow 22/tcp # SSH
|
||||
ufw allow 80/tcp # HTTP (redirect)
|
||||
ufw allow 443/tcp # HTTPS
|
||||
ufw enable
|
||||
```
|
||||
|
||||
> **Dica**: Para segurança máxima, restrinja as portas 80 e 443 apenas para IPs da Cloudflare. Veja a seção [Segurança Avançada](#segurança-avançada).
|
||||
|
||||
---
|
||||
|
||||
## 2. Instalar o OmniRoute
|
||||
|
||||
### 2.1 Criar diretório de configuração
|
||||
|
||||
```bash
|
||||
mkdir -p /opt/omniroute
|
||||
```
|
||||
|
||||
### 2.2 Criar arquivo de variáveis de ambiente
|
||||
|
||||
```bash
|
||||
cat > /opt/omniroute/.env << 'EOF'
|
||||
# === Segurança ===
|
||||
JWT_SECRET=ALTERE-PARA-CHAVE-SECRETA-UNICA-64-CHARS
|
||||
INITIAL_PASSWORD=SuaSenhaSegura123!
|
||||
API_KEY_SECRET=ALTERE-PARA-OUTRA-CHAVE-SECRETA
|
||||
STORAGE_ENCRYPTION_KEY=ALTERE-PARA-TERCEIRA-CHAVE-SECRETA
|
||||
STORAGE_ENCRYPTION_KEY_VERSION=v1
|
||||
MACHINE_ID_SALT=ALTERE-PARA-SALT-UNICO
|
||||
|
||||
# === App ===
|
||||
PORT=20128
|
||||
NODE_ENV=production
|
||||
HOSTNAME=0.0.0.0
|
||||
DATA_DIR=/app/data
|
||||
STORAGE_DRIVER=sqlite
|
||||
ENABLE_REQUEST_LOGS=true
|
||||
AUTH_COOKIE_SECURE=false
|
||||
REQUIRE_API_KEY=false
|
||||
|
||||
# === Domain (altere para seu domínio) ===
|
||||
BASE_URL=https://llms.seudominio.com
|
||||
NEXT_PUBLIC_BASE_URL=https://llms.seudominio.com
|
||||
|
||||
# === Cloud Sync (opcional) ===
|
||||
# CLOUD_URL=https://cloud.omniroute.online
|
||||
# NEXT_PUBLIC_CLOUD_URL=https://cloud.omniroute.online
|
||||
EOF
|
||||
```
|
||||
|
||||
> ⚠️ **IMPORTANTE**: Gere chaves secretas únicas! Use `openssl rand -hex 32` para cada chave.
|
||||
|
||||
### 2.3 Iniciar o container
|
||||
|
||||
```bash
|
||||
docker pull diegosouzapw/omniroute:latest
|
||||
|
||||
docker run -d \
|
||||
--name omniroute \
|
||||
--restart unless-stopped \
|
||||
--env-file /opt/omniroute/.env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
### 2.4 Verificar se está rodando
|
||||
|
||||
```bash
|
||||
docker ps | grep omniroute
|
||||
docker logs omniroute --tail 20
|
||||
```
|
||||
|
||||
Deve exibir: `[DB] SQLite database ready` e `listening on port 20128`.
|
||||
|
||||
---
|
||||
|
||||
## 3. Configurar nginx (Reverse Proxy)
|
||||
|
||||
### 3.1 Gerar certificado SSL (Cloudflare Origin)
|
||||
|
||||
No painel da Cloudflare:
|
||||
|
||||
1. Vá em **SSL/TLS → Origin Server**
|
||||
2. Clique **Create Certificate**
|
||||
3. Deixe os padrões (15 anos, \*.seudominio.com)
|
||||
4. Copie o **Origin Certificate** e a **Private Key**
|
||||
|
||||
```bash
|
||||
mkdir -p /etc/nginx/ssl
|
||||
|
||||
# Colar o certificado
|
||||
nano /etc/nginx/ssl/origin.crt
|
||||
|
||||
# Colar a chave privada
|
||||
nano /etc/nginx/ssl/origin.key
|
||||
|
||||
chmod 600 /etc/nginx/ssl/origin.key
|
||||
```
|
||||
|
||||
### 3.2 Configuração do nginx
|
||||
|
||||
```bash
|
||||
cat > /etc/nginx/sites-available/omniroute << 'NGINX'
|
||||
# Default server — bloqueia acesso direto por IP
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen [::]:80 default_server;
|
||||
listen 443 ssl default_server;
|
||||
listen [::]:443 ssl default_server;
|
||||
ssl_certificate /etc/nginx/ssl/origin.crt;
|
||||
ssl_certificate_key /etc/nginx/ssl/origin.key;
|
||||
server_name _;
|
||||
return 444;
|
||||
}
|
||||
|
||||
# OmniRoute — HTTPS
|
||||
server {
|
||||
listen 443 ssl;
|
||||
listen [::]:443 ssl;
|
||||
server_name llms.seudominio.com; # Altere para seu domínio
|
||||
|
||||
ssl_certificate /etc/nginx/ssl/origin.crt;
|
||||
ssl_certificate_key /etc/nginx/ssl/origin.key;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
|
||||
client_max_body_size 100M;
|
||||
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:20128;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
# SSE (Server-Sent Events) — streaming AI responses
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
}
|
||||
}
|
||||
|
||||
# HTTP → HTTPS redirect
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
server_name llms.seudominio.com;
|
||||
return 301 https://$server_name$request_uri;
|
||||
}
|
||||
NGINX
|
||||
```
|
||||
|
||||
### 3.3 Ativar e testar
|
||||
|
||||
```bash
|
||||
# Remover config padrão
|
||||
rm -f /etc/nginx/sites-enabled/default
|
||||
|
||||
# Ativar OmniRoute
|
||||
ln -sf /etc/nginx/sites-available/omniroute /etc/nginx/sites-enabled/omniroute
|
||||
|
||||
# Testar e recarregar
|
||||
nginx -t && systemctl reload nginx
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Configurar Cloudflare DNS
|
||||
|
||||
### 4.1 Adicionar registro DNS
|
||||
|
||||
No painel da Cloudflare → DNS:
|
||||
|
||||
| Type | Name | Content | Proxy |
|
||||
| ---- | ------ | ------------------------- | ---------- |
|
||||
| A | `llms` | `203.0.113.10` (IP da VM) | ✅ Proxied |
|
||||
|
||||
### 4.2 Configurar SSL
|
||||
|
||||
Em **SSL/TLS → Overview**:
|
||||
|
||||
- Modo: **Full (Strict)**
|
||||
|
||||
Em **SSL/TLS → Edge Certificates**:
|
||||
|
||||
- Always Use HTTPS: ✅ On
|
||||
- Minimum TLS Version: TLS 1.2
|
||||
- Automatic HTTPS Rewrites: ✅ On
|
||||
|
||||
### 4.3 Testar
|
||||
|
||||
```bash
|
||||
curl -sI https://llms.seudominio.com/health
|
||||
# Deve retornar HTTP/2 200
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Operações e Manutenção
|
||||
|
||||
### Atualizar para nova versão
|
||||
|
||||
```bash
|
||||
docker pull diegosouzapw/omniroute:latest
|
||||
docker stop omniroute && docker rm omniroute
|
||||
docker run -d --name omniroute --restart unless-stopped \
|
||||
--env-file /opt/omniroute/.env \
|
||||
-p 20128:20128 \
|
||||
-v omniroute-data:/app/data \
|
||||
diegosouzapw/omniroute:latest
|
||||
```
|
||||
|
||||
### Ver logs
|
||||
|
||||
```bash
|
||||
docker logs -f omniroute # Stream em tempo real
|
||||
docker logs omniroute --tail 50 # Últimas 50 linhas
|
||||
```
|
||||
|
||||
### Backup manual do banco
|
||||
|
||||
```bash
|
||||
# Copiar dados do volume para o host
|
||||
docker cp omniroute:/app/data ./backup-$(date +%F)
|
||||
|
||||
# Ou comprimir todo o volume
|
||||
docker run --rm -v omniroute-data:/data -v $(pwd):/backup \
|
||||
alpine tar czf /backup/omniroute-data-$(date +%F).tar.gz /data
|
||||
```
|
||||
|
||||
### Restaurar de backup
|
||||
|
||||
```bash
|
||||
docker stop omniroute
|
||||
docker run --rm -v omniroute-data:/data -v $(pwd):/backup \
|
||||
alpine sh -c "rm -rf /data/* && tar xzf /backup/omniroute-data-YYYY-MM-DD.tar.gz -C /"
|
||||
docker start omniroute
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Segurança Avançada
|
||||
|
||||
### Restringir nginx para Cloudflare IPs
|
||||
|
||||
```bash
|
||||
cat > /etc/nginx/cloudflare-ips.conf << 'CF'
|
||||
# Cloudflare IPv4 ranges — atualizar periodicamente
|
||||
# https://www.cloudflare.com/ips-v4/
|
||||
set_real_ip_from 173.245.48.0/20;
|
||||
set_real_ip_from 103.21.244.0/22;
|
||||
set_real_ip_from 103.22.200.0/22;
|
||||
set_real_ip_from 103.31.4.0/22;
|
||||
set_real_ip_from 141.101.64.0/18;
|
||||
set_real_ip_from 108.162.192.0/18;
|
||||
set_real_ip_from 190.93.240.0/20;
|
||||
set_real_ip_from 188.114.96.0/20;
|
||||
set_real_ip_from 197.234.240.0/22;
|
||||
set_real_ip_from 198.41.128.0/17;
|
||||
set_real_ip_from 162.158.0.0/15;
|
||||
set_real_ip_from 104.16.0.0/13;
|
||||
set_real_ip_from 104.24.0.0/14;
|
||||
set_real_ip_from 172.64.0.0/13;
|
||||
set_real_ip_from 131.0.72.0/22;
|
||||
real_ip_header CF-Connecting-IP;
|
||||
CF
|
||||
```
|
||||
|
||||
Adicionar no `nginx.conf` dentro do bloco `http {}`:
|
||||
|
||||
```nginx
|
||||
include /etc/nginx/cloudflare-ips.conf;
|
||||
```
|
||||
|
||||
### Install fail2ban
|
||||
|
||||
```bash
|
||||
apt install -y fail2ban
|
||||
systemctl enable fail2ban
|
||||
systemctl start fail2ban
|
||||
|
||||
# Verificar status
|
||||
fail2ban-client status sshd
|
||||
```
|
||||
|
||||
### Bloquear acesso direto na porta do Docker
|
||||
|
||||
```bash
|
||||
# Impedir acesso externo direto à porta 20128
|
||||
iptables -I DOCKER-USER -p tcp --dport 20128 -j DROP
|
||||
iptables -I DOCKER-USER -i lo -p tcp --dport 20128 -j ACCEPT
|
||||
|
||||
# Persistir as regras
|
||||
apt install -y iptables-persistent
|
||||
netfilter-persistent save
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Deploy do Cloud Worker (Opcional)
|
||||
|
||||
Para acesso remoto via Cloudflare Workers (sem expor a VM diretamente):
|
||||
|
||||
```bash
|
||||
# No repositório local
|
||||
cd omnirouteCloud
|
||||
npm install
|
||||
npx wrangler login
|
||||
npx wrangler deploy
|
||||
```
|
||||
|
||||
Ver documentação completa em [omnirouteCloud/README.md](../omnirouteCloud/README.md).
|
||||
|
||||
---
|
||||
|
||||
## Resumo de Portas
|
||||
|
||||
| Porta | Serviço | Acesso |
|
||||
| ----- | ----------- | ----------------------------- |
|
||||
| 22 | SSH | Público (com fail2ban) |
|
||||
| 80 | nginx HTTP | Redirect → HTTPS |
|
||||
| 443 | nginx HTTPS | Via Cloudflare Proxy |
|
||||
| 20128 | OmniRoute | Somente localhost (via nginx) |
|
||||
@@ -0,0 +1,200 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/A2A-SERVER.md) · 🇪🇸 [es](../es/A2A-SERVER.md) · 🇫🇷 [fr](../fr/A2A-SERVER.md) · 🇩🇪 [de](../de/A2A-SERVER.md) · 🇮🇹 [it](../it/A2A-SERVER.md) · 🇷🇺 [ru](../ru/A2A-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/A2A-SERVER.md) · 🇯🇵 [ja](../ja/A2A-SERVER.md) · 🇰🇷 [ko](../ko/A2A-SERVER.md) · 🇸🇦 [ar](../ar/A2A-SERVER.md) · 🇮🇳 [in](../in/A2A-SERVER.md) · 🇹🇭 [th](../th/A2A-SERVER.md) · 🇻🇳 [vi](../vi/A2A-SERVER.md) · 🇮🇩 [id](../id/A2A-SERVER.md) · 🇲🇾 [ms](../ms/A2A-SERVER.md) · 🇳🇱 [nl](../nl/A2A-SERVER.md) · 🇵🇱 [pl](../pl/A2A-SERVER.md) · 🇸🇪 [sv](../sv/A2A-SERVER.md) · 🇳🇴 [no](../no/A2A-SERVER.md) · 🇩🇰 [da](../da/A2A-SERVER.md) · 🇫🇮 [fi](../fi/A2A-SERVER.md) · 🇵🇹 [pt](../pt/A2A-SERVER.md) · 🇷🇴 [ro](../ro/A2A-SERVER.md) · 🇭🇺 [hu](../hu/A2A-SERVER.md) · 🇧🇬 [bg](../bg/A2A-SERVER.md) · 🇸🇰 [sk](../sk/A2A-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/A2A-SERVER.md) · 🇮🇱 [he](../he/A2A-SERVER.md) · 🇵🇭 [phi](../phi/A2A-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute A2A Server Documentation
|
||||
|
||||
> Agent-to-Agent Protocol v0.3 — OmniRoute as an intelligent routing agent
|
||||
|
||||
## Agent Discovery
|
||||
|
||||
```bash
|
||||
curl http://localhost:20128/.well-known/agent.json
|
||||
```
|
||||
|
||||
Returns the Agent Card describing OmniRoute's capabilities, skills, and authentication requirements.
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
All `/a2a` requests require an API key via the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer YOUR_OMNIROUTE_API_KEY
|
||||
```
|
||||
|
||||
If no API key is configured on the server, authentication is bypassed.
|
||||
|
||||
---
|
||||
|
||||
## JSON-RPC 2.0 Methods
|
||||
|
||||
### `message/send` — Synchronous Execution
|
||||
|
||||
Sends a message to a skill and waits for the complete response.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Write a hello world in Python"}],
|
||||
"metadata": {"model": "auto", "combo": "fast-coding"}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"result": {
|
||||
"task": { "id": "uuid", "state": "completed" },
|
||||
"artifacts": [{ "type": "text", "content": "..." }],
|
||||
"metadata": {
|
||||
"routing_explanation": "Selected claude-sonnet via provider \"anthropic\" (latency: 1200ms, cost: $0.003)",
|
||||
"cost_envelope": { "estimated": 0.005, "actual": 0.003, "currency": "USD" },
|
||||
"resilience_trace": [
|
||||
{ "event": "primary_selected", "provider": "anthropic", "timestamp": "..." }
|
||||
],
|
||||
"policy_verdict": { "allowed": true, "reason": "within budget and quota limits" }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `message/stream` — SSE Streaming
|
||||
|
||||
Same as `message/send` but returns Server-Sent Events for real-time streaming.
|
||||
|
||||
```bash
|
||||
curl -N -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "1",
|
||||
"method": "message/stream",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Explain quantum computing"}]
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**SSE Events:**
|
||||
|
||||
```
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"working"},"chunk":{"type":"text","content":"..."}}}
|
||||
|
||||
: heartbeat 2026-03-03T17:00:00Z
|
||||
|
||||
data: {"jsonrpc":"2.0","method":"message/stream","params":{"task":{"id":"...","state":"completed"},"metadata":{...}}}
|
||||
```
|
||||
|
||||
### `tasks/get` — Query Task Status
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"2","method":"tasks/get","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
### `tasks/cancel` — Cancel a Task
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer YOUR_KEY" \
|
||||
-d '{"jsonrpc":"2.0","id":"3","method":"tasks/cancel","params":{"taskId":"TASK_UUID"}}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Skills
|
||||
|
||||
| Skill | Description |
|
||||
| :----------------- | :------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `smart-routing` | Routes prompts through OmniRoute's intelligent pipeline. Returns response with routing explanation, cost, and resilience trace. |
|
||||
| `quota-management` | Answers natural-language queries about provider quotas, suggests free combos, and provides quota rankings. |
|
||||
|
||||
---
|
||||
|
||||
## Task Lifecycle
|
||||
|
||||
```
|
||||
submitted → working → completed
|
||||
→ failed
|
||||
→ cancelled
|
||||
```
|
||||
|
||||
- Tasks expire after 5 minutes (configurable)
|
||||
- Terminal states: `completed`, `failed`, `cancelled`
|
||||
- Event log tracks every state transition
|
||||
|
||||
---
|
||||
|
||||
## Error Codes
|
||||
|
||||
| Code | Meaning |
|
||||
| :----- | :----------------------------- |
|
||||
| -32700 | Parse error (invalid JSON) |
|
||||
| -32600 | Invalid request / Unauthorized |
|
||||
| -32601 | Method or skill not found |
|
||||
| -32602 | Invalid params |
|
||||
| -32603 | Internal error |
|
||||
|
||||
---
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### Python (requests)
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
resp = requests.post("http://localhost:20128/a2a", json={
|
||||
"jsonrpc": "2.0", "id": "1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"skill": "smart-routing",
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}
|
||||
}, headers={"Authorization": "Bearer YOUR_KEY"})
|
||||
|
||||
result = resp.json()["result"]
|
||||
print(result["artifacts"][0]["content"])
|
||||
print(result["metadata"]["routing_explanation"])
|
||||
```
|
||||
|
||||
### TypeScript (fetch)
|
||||
|
||||
```typescript
|
||||
const resp = await fetch("http://localhost:20128/a2a", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: "Bearer YOUR_KEY",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: "1",
|
||||
method: "message/send",
|
||||
params: {
|
||||
skill: "smart-routing",
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
const { result } = await resp.json();
|
||||
console.log(result.metadata.routing_explanation);
|
||||
```
|
||||
@@ -0,0 +1,455 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/API_REFERENCE.md) · 🇪🇸 [es](../es/API_REFERENCE.md) · 🇫🇷 [fr](../fr/API_REFERENCE.md) · 🇩🇪 [de](../de/API_REFERENCE.md) · 🇮🇹 [it](../it/API_REFERENCE.md) · 🇷🇺 [ru](../ru/API_REFERENCE.md) · 🇨🇳 [zh-CN](../zh-CN/API_REFERENCE.md) · 🇯🇵 [ja](../ja/API_REFERENCE.md) · 🇰🇷 [ko](../ko/API_REFERENCE.md) · 🇸🇦 [ar](../ar/API_REFERENCE.md) · 🇮🇳 [in](../in/API_REFERENCE.md) · 🇹🇭 [th](../th/API_REFERENCE.md) · 🇻🇳 [vi](../vi/API_REFERENCE.md) · 🇮🇩 [id](../id/API_REFERENCE.md) · 🇲🇾 [ms](../ms/API_REFERENCE.md) · 🇳🇱 [nl](../nl/API_REFERENCE.md) · 🇵🇱 [pl](../pl/API_REFERENCE.md) · 🇸🇪 [sv](../sv/API_REFERENCE.md) · 🇳🇴 [no](../no/API_REFERENCE.md) · 🇩🇰 [da](../da/API_REFERENCE.md) · 🇫🇮 [fi](../fi/API_REFERENCE.md) · 🇵🇹 [pt](../pt/API_REFERENCE.md) · 🇷🇴 [ro](../ro/API_REFERENCE.md) · 🇭🇺 [hu](../hu/API_REFERENCE.md) · 🇧🇬 [bg](../bg/API_REFERENCE.md) · 🇸🇰 [sk](../sk/API_REFERENCE.md) · 🇺🇦 [uk-UA](../uk-UA/API_REFERENCE.md) · 🇮🇱 [he](../he/API_REFERENCE.md) · 🇵🇭 [phi](../phi/API_REFERENCE.md)
|
||||
|
||||
---
|
||||
|
||||
# API Reference
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](API_REFERENCE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/API_REFERENCE.md) | 🇪🇸 [Español](i18n/es/API_REFERENCE.md) | 🇫🇷 [Français](i18n/fr/API_REFERENCE.md) | 🇮🇹 [Italiano](i18n/it/API_REFERENCE.md) | 🇷🇺 [Русский](i18n/ru/API_REFERENCE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/API_REFERENCE.md) | 🇩🇪 [Deutsch](i18n/de/API_REFERENCE.md) | 🇮🇳 [हिन्दी](i18n/in/API_REFERENCE.md) | 🇹🇭 [ไทย](i18n/th/API_REFERENCE.md) | 🇺🇦 [Українська](i18n/uk-UA/API_REFERENCE.md) | 🇸🇦 [العربية](i18n/ar/API_REFERENCE.md) | 🇯🇵 [日本語](i18n/ja/API_REFERENCE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/API_REFERENCE.md) | 🇧🇬 [Български](i18n/bg/API_REFERENCE.md) | 🇩🇰 [Dansk](i18n/da/API_REFERENCE.md) | 🇫🇮 [Suomi](i18n/fi/API_REFERENCE.md) | 🇮🇱 [עברית](i18n/he/API_REFERENCE.md) | 🇭🇺 [Magyar](i18n/hu/API_REFERENCE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/API_REFERENCE.md) | 🇰🇷 [한국어](i18n/ko/API_REFERENCE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/API_REFERENCE.md) | 🇳🇱 [Nederlands](i18n/nl/API_REFERENCE.md) | 🇳🇴 [Norsk](i18n/no/API_REFERENCE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/API_REFERENCE.md) | 🇷🇴 [Română](i18n/ro/API_REFERENCE.md) | 🇵🇱 [Polski](i18n/pl/API_REFERENCE.md) | 🇸🇰 [Slovenčina](i18n/sk/API_REFERENCE.md) | 🇸🇪 [Svenska](i18n/sv/API_REFERENCE.md) | 🇵🇭 [Filipino](i18n/phi/API_REFERENCE.md)
|
||||
|
||||
Complete reference for all OmniRoute API endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Chat Completions](#chat-completions)
|
||||
- [Embeddings](#embeddings)
|
||||
- [Image Generation](#image-generation)
|
||||
- [List Models](#list-models)
|
||||
- [Compatibility Endpoints](#compatibility-endpoints)
|
||||
- [Semantic Cache](#semantic-cache)
|
||||
- [Dashboard & Management](#dashboard--management)
|
||||
- [Request Processing](#request-processing)
|
||||
- [Authentication](#authentication)
|
||||
|
||||
---
|
||||
|
||||
## Chat Completions
|
||||
|
||||
```bash
|
||||
POST /v1/chat/completions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "cc/claude-opus-4-6",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Write a function to..."}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
```
|
||||
|
||||
### Custom Headers
|
||||
|
||||
| Header | Direction | Description |
|
||||
| ------------------------ | --------- | --------------------------------- |
|
||||
| `X-OmniRoute-No-Cache` | Request | Set to `true` to bypass cache |
|
||||
| `X-OmniRoute-Progress` | Request | Set to `true` for progress events |
|
||||
| `Idempotency-Key` | Request | Dedup key (5s window) |
|
||||
| `X-Request-Id` | Request | Alternative dedup key |
|
||||
| `X-OmniRoute-Cache` | Response | `HIT` or `MISS` (non-streaming) |
|
||||
| `X-OmniRoute-Idempotent` | Response | `true` if deduplicated |
|
||||
| `X-OmniRoute-Progress` | Response | `enabled` if progress tracking on |
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
```bash
|
||||
POST /v1/embeddings
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "nebius/Qwen/Qwen3-Embedding-8B",
|
||||
"input": "The food was delicious"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: Nebius, OpenAI, Mistral, Together AI, Fireworks, NVIDIA.
|
||||
|
||||
```bash
|
||||
# List all embedding models
|
||||
GET /v1/embeddings
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Image Generation
|
||||
|
||||
```bash
|
||||
POST /v1/images/generations
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "openai/dall-e-3",
|
||||
"prompt": "A beautiful sunset over mountains",
|
||||
"size": "1024x1024"
|
||||
}
|
||||
```
|
||||
|
||||
Available providers: OpenAI (DALL-E), xAI (Grok Image), Together AI (FLUX), Fireworks AI.
|
||||
|
||||
```bash
|
||||
# List all image models
|
||||
GET /v1/images/generations
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## List Models
|
||||
|
||||
```bash
|
||||
GET /v1/models
|
||||
Authorization: Bearer your-api-key
|
||||
|
||||
→ Returns all chat, embedding, and image models + combos in OpenAI format
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Compatibility Endpoints
|
||||
|
||||
| Method | Path | Format |
|
||||
| ------ | --------------------------- | ---------------------- |
|
||||
| POST | `/v1/chat/completions` | OpenAI |
|
||||
| POST | `/v1/messages` | Anthropic |
|
||||
| POST | `/v1/responses` | OpenAI Responses |
|
||||
| POST | `/v1/embeddings` | OpenAI |
|
||||
| POST | `/v1/images/generations` | OpenAI |
|
||||
| GET | `/v1/models` | OpenAI |
|
||||
| POST | `/v1/messages/count_tokens` | Anthropic |
|
||||
| GET | `/v1beta/models` | Gemini |
|
||||
| POST | `/v1beta/models/{...path}` | Gemini generateContent |
|
||||
| POST | `/v1/api/chat` | Ollama |
|
||||
|
||||
### Dedicated Provider Routes
|
||||
|
||||
```bash
|
||||
POST /v1/providers/{provider}/chat/completions
|
||||
POST /v1/providers/{provider}/embeddings
|
||||
POST /v1/providers/{provider}/images/generations
|
||||
```
|
||||
|
||||
The provider prefix is auto-added if missing. Mismatched models return `400`.
|
||||
|
||||
---
|
||||
|
||||
## Semantic Cache
|
||||
|
||||
```bash
|
||||
# Get cache stats
|
||||
GET /api/cache
|
||||
|
||||
# Clear all caches
|
||||
DELETE /api/cache
|
||||
```
|
||||
|
||||
Response example:
|
||||
|
||||
```json
|
||||
{
|
||||
"semanticCache": {
|
||||
"memorySize": 42,
|
||||
"memoryMaxSize": 500,
|
||||
"dbSize": 128,
|
||||
"hitRate": 0.65
|
||||
},
|
||||
"idempotency": {
|
||||
"activeKeys": 3,
|
||||
"windowMs": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dashboard & Management
|
||||
|
||||
### Authentication
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------------- | ------- | --------------------- |
|
||||
| `/api/auth/login` | POST | Login |
|
||||
| `/api/auth/logout` | POST | Logout |
|
||||
| `/api/settings/require-login` | GET/PUT | Toggle login required |
|
||||
|
||||
### Provider Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------- | --------------- | ------------------------ |
|
||||
| `/api/providers` | GET/POST | List / create providers |
|
||||
| `/api/providers/[id]` | GET/PUT/DELETE | Manage a provider |
|
||||
| `/api/providers/[id]/test` | POST | Test provider connection |
|
||||
| `/api/providers/[id]/models` | GET | List provider models |
|
||||
| `/api/providers/validate` | POST | Validate provider config |
|
||||
| `/api/provider-nodes*` | Various | Provider node management |
|
||||
| `/api/provider-models` | GET/POST/DELETE | Custom models |
|
||||
|
||||
### OAuth Flows
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------------- | ------- | ----------------------- |
|
||||
| `/api/oauth/[provider]/[action]` | Various | Provider-specific OAuth |
|
||||
|
||||
### Routing & Config
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------- | -------- | ----------------------------- |
|
||||
| `/api/models/alias` | GET/POST | Model aliases |
|
||||
| `/api/models/catalog` | GET | All models by provider + type |
|
||||
| `/api/combos*` | Various | Combo management |
|
||||
| `/api/keys*` | Various | API key management |
|
||||
| `/api/pricing` | GET | Model pricing |
|
||||
|
||||
### Usage & Analytics
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | -------------------- |
|
||||
| `/api/usage/history` | GET | Usage history |
|
||||
| `/api/usage/logs` | GET | Usage logs |
|
||||
| `/api/usage/request-logs` | GET | Request-level logs |
|
||||
| `/api/usage/[connectionId]` | GET | Per-connection usage |
|
||||
|
||||
### Settings
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------------- | ------- | ---------------------- |
|
||||
| `/api/settings` | GET/PUT | General settings |
|
||||
| `/api/settings/proxy` | GET/PUT | Network proxy config |
|
||||
| `/api/settings/proxy/test` | POST | Test proxy connection |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt |
|
||||
|
||||
### Monitoring
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------------------ | ---------- | ----------------------- |
|
||||
| `/api/sessions` | GET | Active session tracking |
|
||||
| `/api/rate-limits` | GET | Per-account rate limits |
|
||||
| `/api/monitoring/health` | GET | Health check |
|
||||
| `/api/cache` | GET/DELETE | Cache stats / clear |
|
||||
|
||||
### Backup & Export/Import
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | --------------------------------------- |
|
||||
| `/api/db-backups` | GET | List available backups |
|
||||
| `/api/db-backups` | PUT | Create a manual backup |
|
||||
| `/api/db-backups` | POST | Restore from a specific backup |
|
||||
| `/api/db-backups/export` | GET | Download database as .sqlite file |
|
||||
| `/api/db-backups/import` | POST | Upload .sqlite file to replace database |
|
||||
| `/api/db-backups/exportAll` | GET | Download full backup as .tar.gz archive |
|
||||
|
||||
### Cloud Sync
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------- | ------- | --------------------- |
|
||||
| `/api/sync/cloud` | Various | Cloud sync operations |
|
||||
| `/api/sync/initialize` | POST | Initialize sync |
|
||||
| `/api/cloud/*` | Various | Cloud management |
|
||||
|
||||
### CLI Tools
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ---------------------------------- | ------ | ------------------- |
|
||||
| `/api/cli-tools/claude-settings` | GET | Claude CLI status |
|
||||
| `/api/cli-tools/codex-settings` | GET | Codex CLI status |
|
||||
| `/api/cli-tools/droid-settings` | GET | Droid CLI status |
|
||||
| `/api/cli-tools/openclaw-settings` | GET | OpenClaw CLI status |
|
||||
| `/api/cli-tools/runtime/[toolId]` | GET | Generic CLI runtime |
|
||||
|
||||
CLI responses include: `installed`, `runnable`, `command`, `commandPath`, `runtimeMode`, `reason`.
|
||||
|
||||
### ACP Agents
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------- | ------ | -------------------------------------------------------- |
|
||||
| `/api/acp/agents` | GET | List all detected agents (built-in + custom) with status |
|
||||
| `/api/acp/agents` | POST | Add custom agent or refresh detection cache |
|
||||
| `/api/acp/agents` | DELETE | Remove a custom agent by `id` query param |
|
||||
|
||||
GET response includes `agents[]` (id, name, binary, version, installed, protocol, isCustom) and `summary` (total, installed, notFound, builtIn, custom).
|
||||
|
||||
### Resilience & Rate Limits
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ----------------------- | ------- | ------------------------------- |
|
||||
| `/api/resilience` | GET/PUT | Get/update resilience profiles |
|
||||
| `/api/resilience/reset` | POST | Reset circuit breakers |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
| `/api/rate-limit` | GET | Global rate limit configuration |
|
||||
|
||||
### Evals
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| ------------ | -------- | --------------------------------- |
|
||||
| `/api/evals` | GET/POST | List eval suites / run evaluation |
|
||||
|
||||
### Policies
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | --------------- | ----------------------- |
|
||||
| `/api/policies` | GET/POST/DELETE | Manage routing policies |
|
||||
|
||||
### Compliance
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------------------- | ------ | ----------------------------- |
|
||||
| `/api/compliance/audit-log` | GET | Compliance audit log (last N) |
|
||||
|
||||
### v1beta (Gemini-Compatible)
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| -------------------------- | ------ | --------------------------------- |
|
||||
| `/v1beta/models` | GET | List models in Gemini format |
|
||||
| `/v1beta/models/{...path}` | POST | Gemini `generateContent` endpoint |
|
||||
|
||||
These endpoints mirror Gemini's API format for clients that expect native Gemini SDK compatibility.
|
||||
|
||||
### Internal / System APIs
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
| --------------- | ------ | ---------------------------------------------------- |
|
||||
| `/api/init` | GET | Application initialization check (used on first run) |
|
||||
| `/api/tags` | GET | Ollama-compatible model tags (for Ollama clients) |
|
||||
| `/api/restart` | POST | Trigger graceful server restart |
|
||||
| `/api/shutdown` | POST | Trigger graceful server shutdown |
|
||||
|
||||
> **Note:** These endpoints are used internally by the system or for Ollama client compatibility. They are not typically called by end users.
|
||||
|
||||
---
|
||||
|
||||
## Audio Transcription
|
||||
|
||||
```bash
|
||||
POST /v1/audio/transcriptions
|
||||
Authorization: Bearer your-api-key
|
||||
Content-Type: multipart/form-data
|
||||
```
|
||||
|
||||
Transcribe audio files using Deepgram or AssemblyAI.
|
||||
|
||||
**Request:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:20128/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-F "file=@recording.mp3" \
|
||||
-F "model=deepgram/nova-3"
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "Hello, this is the transcribed audio content.",
|
||||
"task": "transcribe",
|
||||
"language": "en",
|
||||
"duration": 12.5
|
||||
}
|
||||
```
|
||||
|
||||
**Supported providers:** `deepgram/nova-3`, `assemblyai/best`.
|
||||
|
||||
**Supported formats:** `mp3`, `wav`, `m4a`, `flac`, `ogg`, `webm`.
|
||||
|
||||
---
|
||||
|
||||
## Ollama Compatibility
|
||||
|
||||
For clients that use Ollama's API format:
|
||||
|
||||
```bash
|
||||
# Chat endpoint (Ollama format)
|
||||
POST /v1/api/chat
|
||||
|
||||
# Model listing (Ollama format)
|
||||
GET /api/tags
|
||||
```
|
||||
|
||||
Requests are automatically translated between Ollama and internal formats.
|
||||
|
||||
---
|
||||
|
||||
## Telemetry
|
||||
|
||||
```bash
|
||||
# Get latency telemetry summary (p50/p95/p99 per provider)
|
||||
GET /api/telemetry/summary
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"claudeCode": { "p50": 245, "p95": 890, "p99": 1200, "count": 150 },
|
||||
"github": { "p50": 180, "p95": 620, "p99": 950, "count": 320 }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Budget
|
||||
|
||||
```bash
|
||||
# Get budget status for all API keys
|
||||
GET /api/usage/budget
|
||||
|
||||
# Set or update a budget
|
||||
POST /api/usage/budget
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"keyId": "key-123",
|
||||
"limit": 50.00,
|
||||
"period": "monthly"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Availability
|
||||
|
||||
```bash
|
||||
# Get real-time model availability across all providers
|
||||
GET /api/models/availability
|
||||
|
||||
# Check availability for a specific model
|
||||
POST /api/models/availability
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "claude-sonnet-4-5-20250929"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Request Processing
|
||||
|
||||
1. Client sends request to `/v1/*`
|
||||
2. Route handler calls `handleChat`, `handleEmbedding`, `handleAudioTranscription`, or `handleImageGeneration`
|
||||
3. Model is resolved (direct provider/model or alias/combo)
|
||||
4. Credentials selected from local DB with account availability filtering
|
||||
5. For chat: `handleChatCore` — format detection, translation, cache check, idempotency check
|
||||
6. Provider executor sends upstream request
|
||||
7. Response translated back to client format (chat) or returned as-is (embeddings/images/audio)
|
||||
8. Usage/logging recorded
|
||||
9. Fallback applies on errors according to combo rules
|
||||
|
||||
Full architecture reference: [`ARCHITECTURE.md`](ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
- Dashboard routes (`/dashboard/*`) use `auth_token` cookie
|
||||
- Login uses saved password hash; fallback to `INITIAL_PASSWORD`
|
||||
- `requireLogin` toggleable via `/api/settings/require-login`
|
||||
- `/v1/*` routes optionally require Bearer API key when `REQUIRE_API_KEY=true`
|
||||
@@ -0,0 +1,787 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/ARCHITECTURE.md) · 🇪🇸 [es](../es/ARCHITECTURE.md) · 🇫🇷 [fr](../fr/ARCHITECTURE.md) · 🇩🇪 [de](../de/ARCHITECTURE.md) · 🇮🇹 [it](../it/ARCHITECTURE.md) · 🇷🇺 [ru](../ru/ARCHITECTURE.md) · 🇨🇳 [zh-CN](../zh-CN/ARCHITECTURE.md) · 🇯🇵 [ja](../ja/ARCHITECTURE.md) · 🇰🇷 [ko](../ko/ARCHITECTURE.md) · 🇸🇦 [ar](../ar/ARCHITECTURE.md) · 🇮🇳 [in](../in/ARCHITECTURE.md) · 🇹🇭 [th](../th/ARCHITECTURE.md) · 🇻🇳 [vi](../vi/ARCHITECTURE.md) · 🇮🇩 [id](../id/ARCHITECTURE.md) · 🇲🇾 [ms](../ms/ARCHITECTURE.md) · 🇳🇱 [nl](../nl/ARCHITECTURE.md) · 🇵🇱 [pl](../pl/ARCHITECTURE.md) · 🇸🇪 [sv](../sv/ARCHITECTURE.md) · 🇳🇴 [no](../no/ARCHITECTURE.md) · 🇩🇰 [da](../da/ARCHITECTURE.md) · 🇫🇮 [fi](../fi/ARCHITECTURE.md) · 🇵🇹 [pt](../pt/ARCHITECTURE.md) · 🇷🇴 [ro](../ro/ARCHITECTURE.md) · 🇭🇺 [hu](../hu/ARCHITECTURE.md) · 🇧🇬 [bg](../bg/ARCHITECTURE.md) · 🇸🇰 [sk](../sk/ARCHITECTURE.md) · 🇺🇦 [uk-UA](../uk-UA/ARCHITECTURE.md) · 🇮🇱 [he](../he/ARCHITECTURE.md) · 🇵🇭 [phi](../phi/ARCHITECTURE.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Architecture
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](ARCHITECTURE.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/ARCHITECTURE.md) | 🇪🇸 [Español](i18n/es/ARCHITECTURE.md) | 🇫🇷 [Français](i18n/fr/ARCHITECTURE.md) | 🇮🇹 [Italiano](i18n/it/ARCHITECTURE.md) | 🇷🇺 [Русский](i18n/ru/ARCHITECTURE.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/ARCHITECTURE.md) | 🇩🇪 [Deutsch](i18n/de/ARCHITECTURE.md) | 🇮🇳 [हिन्दी](i18n/in/ARCHITECTURE.md) | 🇹🇭 [ไทย](i18n/th/ARCHITECTURE.md) | 🇺🇦 [Українська](i18n/uk-UA/ARCHITECTURE.md) | 🇸🇦 [العربية](i18n/ar/ARCHITECTURE.md) | 🇯🇵 [日本語](i18n/ja/ARCHITECTURE.md) | 🇻🇳 [Tiếng Việt](i18n/vi/ARCHITECTURE.md) | 🇧🇬 [Български](i18n/bg/ARCHITECTURE.md) | 🇩🇰 [Dansk](i18n/da/ARCHITECTURE.md) | 🇫🇮 [Suomi](i18n/fi/ARCHITECTURE.md) | 🇮🇱 [עברית](i18n/he/ARCHITECTURE.md) | 🇭🇺 [Magyar](i18n/hu/ARCHITECTURE.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/ARCHITECTURE.md) | 🇰🇷 [한국어](i18n/ko/ARCHITECTURE.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/ARCHITECTURE.md) | 🇳🇱 [Nederlands](i18n/nl/ARCHITECTURE.md) | 🇳🇴 [Norsk](i18n/no/ARCHITECTURE.md) | 🇵🇹 [Português (Portugal)](i18n/pt/ARCHITECTURE.md) | 🇷🇴 [Română](i18n/ro/ARCHITECTURE.md) | 🇵🇱 [Polski](i18n/pl/ARCHITECTURE.md) | 🇸🇰 [Slovenčina](i18n/sk/ARCHITECTURE.md) | 🇸🇪 [Svenska](i18n/sv/ARCHITECTURE.md) | 🇵🇭 [Filipino](i18n/phi/ARCHITECTURE.md)
|
||||
|
||||
_Last updated: 2026-03-04_
|
||||
|
||||
## Executive Summary
|
||||
|
||||
OmniRoute is a local AI routing gateway and dashboard built on Next.js.
|
||||
It provides a single OpenAI-compatible endpoint (`/v1/*`) and routes traffic across multiple upstream providers with translation, fallback, token refresh, and usage tracking.
|
||||
|
||||
Core capabilities:
|
||||
|
||||
- OpenAI-compatible API surface for CLI/tools (28 providers)
|
||||
- Request/response translation across provider formats
|
||||
- Model combo fallback (multi-model sequence)
|
||||
- Account-level fallback (multi-account per provider)
|
||||
- OAuth + API-key provider connection management
|
||||
- Embedding generation via `/v1/embeddings` (6 providers, 9 models)
|
||||
- Image generation via `/v1/images/generations` (4 providers, 9 models)
|
||||
- Think tag parsing (`<think>...</think>`) for reasoning models
|
||||
- Response sanitization for strict OpenAI SDK compatibility
|
||||
- Role normalization (developer→system, system→user) for cross-provider compatibility
|
||||
- Structured output conversion (json_schema → Gemini responseSchema)
|
||||
- Local persistence for providers, keys, aliases, combos, settings, pricing
|
||||
- Usage/cost tracking and request logging
|
||||
- Optional cloud sync for multi-device/state sync
|
||||
- IP allowlist/blocklist for API access control
|
||||
- Thinking budget management (passthrough/auto/custom/adaptive)
|
||||
- Global system prompt injection
|
||||
- Session tracking and fingerprinting
|
||||
- Per-account enhanced rate limiting with provider-specific profiles
|
||||
- Circuit breaker pattern for provider resilience
|
||||
- Anti-thundering herd protection with mutex locking
|
||||
- Signature-based request deduplication cache
|
||||
- Domain layer: model availability, cost rules, fallback policy, lockout policy
|
||||
- Domain state persistence (SQLite write-through cache for fallbacks, budgets, lockouts, circuit breakers)
|
||||
- Policy engine for centralized request evaluation (lockout → budget → fallback)
|
||||
- Request telemetry with p50/p95/p99 latency aggregation
|
||||
- Correlation ID (X-Request-Id) for end-to-end tracing
|
||||
- Compliance audit logging with opt-out per API key
|
||||
- Eval framework for LLM quality assurance
|
||||
- Resilience UI dashboard with real-time circuit breaker status
|
||||
- Modular OAuth providers (12 individual modules under `src/lib/oauth/providers/`)
|
||||
|
||||
Primary runtime model:
|
||||
|
||||
- Next.js app routes under `src/app/api/*` implement both dashboard APIs and compatibility APIs
|
||||
- A shared SSE/routing core in `src/sse/*` + `open-sse/*` handles provider execution, translation, streaming, fallback, and usage
|
||||
|
||||
## Scope and Boundaries
|
||||
|
||||
### In Scope
|
||||
|
||||
- Local gateway runtime
|
||||
- Dashboard management APIs
|
||||
- Provider authentication and token refresh
|
||||
- Request translation and SSE streaming
|
||||
- Local state + usage persistence
|
||||
- Optional cloud sync orchestration
|
||||
|
||||
### Out of Scope
|
||||
|
||||
- Cloud service implementation behind `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Provider SLA/control plane outside local process
|
||||
- External CLI binaries themselves (Claude CLI, Codex CLI, etc.)
|
||||
|
||||
## High-Level System Context
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Clients[Developer Clients]
|
||||
C1[Claude Code]
|
||||
C2[Codex CLI]
|
||||
C3[OpenClaw / Droid / Cline / Continue / Roo]
|
||||
C4[Custom OpenAI-compatible clients]
|
||||
BROWSER[Browser Dashboard]
|
||||
end
|
||||
|
||||
subgraph Router[OmniRoute Local Process]
|
||||
API[V1 Compatibility API\n/v1/*]
|
||||
DASH[Dashboard + Management API\n/api/*]
|
||||
CORE[SSE + Translation Core\nopen-sse + src/sse]
|
||||
DB[(storage.sqlite)]
|
||||
UDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph Upstreams[Upstream Providers]
|
||||
P1[OAuth Providers\nClaude/Codex/Gemini/Qwen/iFlow/GitHub/Kiro/Cursor/Antigravity]
|
||||
P2[API Key Providers\nOpenAI/Anthropic/OpenRouter/GLM/Kimi/MiniMax\nDeepSeek/Groq/xAI/Mistral/Perplexity\nTogether/Fireworks/Cerebras/Cohere/NVIDIA]
|
||||
P3[Compatible Nodes\nOpenAI-compatible / Anthropic-compatible]
|
||||
end
|
||||
|
||||
subgraph Cloud[Optional Cloud Sync]
|
||||
CLOUD[Cloud Sync Endpoint\nNEXT_PUBLIC_CLOUD_URL]
|
||||
end
|
||||
|
||||
C1 --> API
|
||||
C2 --> API
|
||||
C3 --> API
|
||||
C4 --> API
|
||||
BROWSER --> DASH
|
||||
|
||||
API --> CORE
|
||||
DASH --> DB
|
||||
CORE --> DB
|
||||
CORE --> UDB
|
||||
|
||||
CORE --> P1
|
||||
CORE --> P2
|
||||
CORE --> P3
|
||||
|
||||
DASH --> CLOUD
|
||||
```
|
||||
|
||||
## Core Runtime Components
|
||||
|
||||
## 1) API and Routing Layer (Next.js App Routes)
|
||||
|
||||
Main directories:
|
||||
|
||||
- `src/app/api/v1/*` and `src/app/api/v1beta/*` for compatibility APIs
|
||||
- `src/app/api/*` for management/configuration APIs
|
||||
- Next rewrites in `next.config.mjs` map `/v1/*` to `/api/v1/*`
|
||||
|
||||
Important compatibility routes:
|
||||
|
||||
- `src/app/api/v1/chat/completions/route.ts`
|
||||
- `src/app/api/v1/messages/route.ts`
|
||||
- `src/app/api/v1/responses/route.ts`
|
||||
- `src/app/api/v1/models/route.ts` — includes custom models with `custom: true`
|
||||
- `src/app/api/v1/embeddings/route.ts` — embedding generation (6 providers)
|
||||
- `src/app/api/v1/images/generations/route.ts` — image generation (4+ providers incl. Antigravity/Nebius)
|
||||
- `src/app/api/v1/messages/count_tokens/route.ts`
|
||||
- `src/app/api/v1/providers/[provider]/chat/completions/route.ts` — dedicated per-provider chat
|
||||
- `src/app/api/v1/providers/[provider]/embeddings/route.ts` — dedicated per-provider embeddings
|
||||
- `src/app/api/v1/providers/[provider]/images/generations/route.ts` — dedicated per-provider images
|
||||
- `src/app/api/v1beta/models/route.ts`
|
||||
- `src/app/api/v1beta/models/[...path]/route.ts`
|
||||
|
||||
Management domains:
|
||||
|
||||
- Auth/settings: `src/app/api/auth/*`, `src/app/api/settings/*`
|
||||
- Providers/connections: `src/app/api/providers*`
|
||||
- Provider nodes: `src/app/api/provider-nodes*`
|
||||
- Custom models: `src/app/api/provider-models` (GET/POST/DELETE)
|
||||
- Model catalog: `src/app/api/models/route.ts` (GET)
|
||||
- Proxy config: `src/app/api/settings/proxy` (GET/PUT/DELETE) + `src/app/api/settings/proxy/test` (POST)
|
||||
- OAuth: `src/app/api/oauth/*`
|
||||
- Keys/aliases/combos/pricing: `src/app/api/keys*`, `src/app/api/models/alias`, `src/app/api/combos*`, `src/app/api/pricing`
|
||||
- Usage: `src/app/api/usage/*`
|
||||
- Sync/cloud: `src/app/api/sync/*`, `src/app/api/cloud/*`
|
||||
- CLI tooling helpers: `src/app/api/cli-tools/*`
|
||||
- IP filter: `src/app/api/settings/ip-filter` (GET/PUT)
|
||||
- Thinking budget: `src/app/api/settings/thinking-budget` (GET/PUT)
|
||||
- System prompt: `src/app/api/settings/system-prompt` (GET/PUT)
|
||||
- Sessions: `src/app/api/sessions` (GET)
|
||||
- Rate limits: `src/app/api/rate-limits` (GET)
|
||||
- Resilience: `src/app/api/resilience` (GET/PATCH) — provider profiles, circuit breaker, rate limit state
|
||||
- Resilience reset: `src/app/api/resilience/reset` (POST) — reset breakers + cooldowns
|
||||
- Cache stats: `src/app/api/cache/stats` (GET/DELETE)
|
||||
- Model availability: `src/app/api/models/availability` (GET/POST)
|
||||
- Telemetry: `src/app/api/telemetry/summary` (GET)
|
||||
- Budget: `src/app/api/usage/budget` (GET/POST)
|
||||
- Fallback chains: `src/app/api/fallback/chains` (GET/POST/DELETE)
|
||||
- Compliance audit: `src/app/api/compliance/audit-log` (GET)
|
||||
- Evals: `src/app/api/evals` (GET/POST), `src/app/api/evals/[suiteId]` (GET)
|
||||
- Policies: `src/app/api/policies` (GET/POST)
|
||||
|
||||
## 2) SSE + Translation Core
|
||||
|
||||
Main flow modules:
|
||||
|
||||
- Entry: `src/sse/handlers/chat.ts`
|
||||
- Core orchestration: `open-sse/handlers/chatCore.ts`
|
||||
- Provider execution adapters: `open-sse/executors/*`
|
||||
- Format detection/provider config: `open-sse/services/provider.ts`
|
||||
- Model parse/resolve: `src/sse/services/model.ts`, `open-sse/services/model.ts`
|
||||
- Account fallback logic: `open-sse/services/accountFallback.ts`
|
||||
- Translation registry: `open-sse/translator/index.ts`
|
||||
- Stream transformations: `open-sse/utils/stream.ts`, `open-sse/utils/streamHandler.ts`
|
||||
- Usage extraction/normalization: `open-sse/utils/usageTracking.ts`
|
||||
- Think tag parser: `open-sse/utils/thinkTagParser.ts`
|
||||
- Embedding handler: `open-sse/handlers/embeddings.ts`
|
||||
- Embedding provider registry: `open-sse/config/embeddingRegistry.ts`
|
||||
- Image generation handler: `open-sse/handlers/imageGeneration.ts`
|
||||
- Image provider registry: `open-sse/config/imageRegistry.ts`
|
||||
- Response sanitization: `open-sse/handlers/responseSanitizer.ts`
|
||||
- Role normalization: `open-sse/services/roleNormalizer.ts`
|
||||
|
||||
Services (business logic):
|
||||
|
||||
- Account selection/scoring: `open-sse/services/accountSelector.ts`
|
||||
- Context lifecycle management: `open-sse/services/contextManager.ts`
|
||||
- IP filter enforcement: `open-sse/services/ipFilter.ts`
|
||||
- Session tracking: `open-sse/services/sessionManager.ts`
|
||||
- Request deduplication: `open-sse/services/signatureCache.ts`
|
||||
- System prompt injection: `open-sse/services/systemPrompt.ts`
|
||||
- Thinking budget management: `open-sse/services/thinkingBudget.ts`
|
||||
- Wildcard model routing: `open-sse/services/wildcardRouter.ts`
|
||||
- Rate limit management: `open-sse/services/rateLimitManager.ts`
|
||||
- Circuit breaker: `open-sse/services/circuitBreaker.ts`
|
||||
|
||||
Domain layer modules:
|
||||
|
||||
- Model availability: `src/lib/domain/modelAvailability.ts`
|
||||
- Cost rules/budgets: `src/lib/domain/costRules.ts`
|
||||
- Fallback policy: `src/lib/domain/fallbackPolicy.ts`
|
||||
- Combo resolver: `src/lib/domain/comboResolver.ts`
|
||||
- Lockout policy: `src/lib/domain/lockoutPolicy.ts`
|
||||
- Policy engine: `src/domain/policyEngine.ts` — centralized lockout → budget → fallback evaluation
|
||||
- Error codes catalog: `src/lib/domain/errorCodes.ts`
|
||||
- Request ID: `src/lib/domain/requestId.ts`
|
||||
- Fetch timeout: `src/lib/domain/fetchTimeout.ts`
|
||||
- Request telemetry: `src/lib/domain/requestTelemetry.ts`
|
||||
- Compliance/audit: `src/lib/domain/compliance/index.ts`
|
||||
- Eval runner: `src/lib/domain/evalRunner.ts`
|
||||
- Domain state persistence: `src/lib/db/domainState.ts` — SQLite CRUD for fallback chains, budgets, cost history, lockout state, circuit breakers
|
||||
|
||||
OAuth provider modules (12 individual files under `src/lib/oauth/providers/`):
|
||||
|
||||
- Registry index: `src/lib/oauth/providers/index.ts`
|
||||
- Individual providers: `claude.ts`, `codex.ts`, `gemini.ts`, `antigravity.ts`, `iflow.ts`, `qwen.ts`, `kimi-coding.ts`, `github.ts`, `kiro.ts`, `cursor.ts`, `kilocode.ts`, `cline.ts`
|
||||
- Thin wrapper: `src/lib/oauth/providers.ts` — re-exports from individual modules
|
||||
|
||||
## 3) Persistence Layer
|
||||
|
||||
Primary state DB (SQLite):
|
||||
|
||||
- Core infra: `src/lib/db/core.ts` (better-sqlite3, migrations, WAL)
|
||||
- Re-export facade: `src/lib/localDb.ts` (thin compatibility layer for callers)
|
||||
- file: `${DATA_DIR}/storage.sqlite` (or `$XDG_CONFIG_HOME/omniroute/storage.sqlite` when set, else `~/.omniroute/storage.sqlite`)
|
||||
- entities (tables + KV namespaces): providerConnections, providerNodes, modelAliases, combos, apiKeys, settings, pricing, **customModels**, **proxyConfig**, **ipFilter**, **thinkingBudget**, **systemPrompt**
|
||||
|
||||
Usage persistence:
|
||||
|
||||
- facade: `src/lib/usageDb.ts` (decomposed modules in `src/lib/usage/*`)
|
||||
- SQLite tables in `storage.sqlite`: `usage_history`, `call_logs`, `proxy_logs`
|
||||
- optional file artifacts remain for compatibility/debug (`${DATA_DIR}/log.txt`, `${DATA_DIR}/call_logs/`, `<repo>/logs/...`)
|
||||
- legacy JSON files are migrated to SQLite by startup migrations when present
|
||||
|
||||
Domain State DB (SQLite):
|
||||
|
||||
- `src/lib/db/domainState.ts` — CRUD operations for domain state
|
||||
- Tables (created in `src/lib/db/core.ts`): `domain_fallback_chains`, `domain_budgets`, `domain_cost_history`, `domain_lockout_state`, `domain_circuit_breakers`
|
||||
- Write-through cache pattern: in-memory Maps are authoritative at runtime; mutations are written synchronously to SQLite; state is restored from DB on cold start
|
||||
|
||||
## 4) Auth + Security Surfaces
|
||||
|
||||
- Dashboard cookie auth: `src/proxy.ts`, `src/app/api/auth/login/route.ts`
|
||||
- API key generation/verification: `src/shared/utils/apiKey.ts`
|
||||
- Provider secrets persisted in `providerConnections` entries
|
||||
- Outbound proxy support via `open-sse/utils/proxyFetch.ts` (env vars) and `open-sse/utils/networkProxy.ts` (configurable per-provider or global)
|
||||
|
||||
## 5) Cloud Sync
|
||||
|
||||
- Scheduler init: `src/lib/initCloudSync.ts`, `src/shared/services/initializeCloudSync.ts`
|
||||
- Periodic task: `src/shared/services/cloudSyncScheduler.ts`
|
||||
- Control route: `src/app/api/sync/cloud/route.ts`
|
||||
|
||||
## Request Lifecycle (`/v1/chat/completions`)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant Client as CLI/SDK Client
|
||||
participant Route as /api/v1/chat/completions
|
||||
participant Chat as src/sse/handlers/chat
|
||||
participant Core as open-sse/handlers/chatCore
|
||||
participant Model as Model Resolver
|
||||
participant Auth as Credential Selector
|
||||
participant Exec as Provider Executor
|
||||
participant Prov as Upstream Provider
|
||||
participant Stream as Stream Translator
|
||||
participant Usage as usageDb
|
||||
|
||||
Client->>Route: POST /v1/chat/completions
|
||||
Route->>Chat: handleChat(request)
|
||||
Chat->>Model: parse/resolve model or combo
|
||||
|
||||
alt Combo model
|
||||
Chat->>Chat: iterate combo models (handleComboChat)
|
||||
end
|
||||
|
||||
Chat->>Auth: getProviderCredentials(provider)
|
||||
Auth-->>Chat: active account + tokens/api key
|
||||
|
||||
Chat->>Core: handleChatCore(body, modelInfo, credentials)
|
||||
Core->>Core: detect source format
|
||||
Core->>Core: translate request to target format
|
||||
Core->>Exec: execute(provider, transformedBody)
|
||||
Exec->>Prov: upstream API call
|
||||
Prov-->>Exec: SSE/JSON response
|
||||
Exec-->>Core: response + metadata
|
||||
|
||||
alt 401/403
|
||||
Core->>Exec: refreshCredentials()
|
||||
Exec-->>Core: updated tokens
|
||||
Core->>Exec: retry request
|
||||
end
|
||||
|
||||
Core->>Stream: translate/normalize stream to client format
|
||||
Stream-->>Client: SSE chunks / JSON response
|
||||
|
||||
Stream->>Usage: extract usage + persist history/log
|
||||
```
|
||||
|
||||
## Combo + Account Fallback Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[Incoming model string] --> B{Is combo name?}
|
||||
B -- Yes --> C[Load combo models sequence]
|
||||
B -- No --> D[Single model path]
|
||||
|
||||
C --> E[Try model N]
|
||||
E --> F[Resolve provider/model]
|
||||
D --> F
|
||||
|
||||
F --> G[Select account credentials]
|
||||
G --> H{Credentials available?}
|
||||
H -- No --> I[Return provider unavailable]
|
||||
H -- Yes --> J[Execute request]
|
||||
|
||||
J --> K{Success?}
|
||||
K -- Yes --> L[Return response]
|
||||
K -- No --> M{Fallback-eligible error?}
|
||||
|
||||
M -- No --> N[Return error]
|
||||
M -- Yes --> O[Mark account unavailable cooldown]
|
||||
O --> P{Another account for provider?}
|
||||
P -- Yes --> G
|
||||
P -- No --> Q{In combo with next model?}
|
||||
Q -- Yes --> E
|
||||
Q -- No --> R[Return all unavailable]
|
||||
```
|
||||
|
||||
Fallback decisions are driven by `open-sse/services/accountFallback.ts` using status codes and error-message heuristics.
|
||||
|
||||
## OAuth Onboarding and Token Refresh Lifecycle
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Dashboard UI
|
||||
participant OAuth as /api/oauth/[provider]/[action]
|
||||
participant ProvAuth as Provider Auth Server
|
||||
participant DB as localDb
|
||||
participant Test as /api/providers/[id]/test
|
||||
participant Exec as Provider Executor
|
||||
|
||||
UI->>OAuth: GET authorize or device-code
|
||||
OAuth->>ProvAuth: create auth/device flow
|
||||
ProvAuth-->>OAuth: auth URL or device code payload
|
||||
OAuth-->>UI: flow data
|
||||
|
||||
UI->>OAuth: POST exchange or poll
|
||||
OAuth->>ProvAuth: token exchange/poll
|
||||
ProvAuth-->>OAuth: access/refresh tokens
|
||||
OAuth->>DB: createProviderConnection(oauth data)
|
||||
OAuth-->>UI: success + connection id
|
||||
|
||||
UI->>Test: POST /api/providers/[id]/test
|
||||
Test->>Exec: validate credentials / optional refresh
|
||||
Exec-->>Test: valid or refreshed token info
|
||||
Test->>DB: update status/tokens/errors
|
||||
Test-->>UI: validation result
|
||||
```
|
||||
|
||||
Refresh during live traffic is executed inside `open-sse/handlers/chatCore.ts` via executor `refreshCredentials()`.
|
||||
|
||||
## Cloud Sync Lifecycle (Enable / Sync / Disable)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant UI as Endpoint Page UI
|
||||
participant Sync as /api/sync/cloud
|
||||
participant DB as localDb
|
||||
participant Cloud as External Cloud Sync
|
||||
participant Claude as ~/.claude/settings.json
|
||||
|
||||
UI->>Sync: POST action=enable
|
||||
Sync->>DB: set cloudEnabled=true
|
||||
Sync->>DB: ensure API key exists
|
||||
Sync->>Cloud: POST /sync/{machineId} (providers/aliases/combos/keys)
|
||||
Cloud-->>Sync: sync result
|
||||
Sync->>Cloud: GET /{machineId}/v1/verify
|
||||
Sync-->>UI: enabled + verification status
|
||||
|
||||
UI->>Sync: POST action=sync
|
||||
Sync->>Cloud: POST /sync/{machineId}
|
||||
Cloud-->>Sync: remote data
|
||||
Sync->>DB: update newer local tokens/status
|
||||
Sync-->>UI: synced
|
||||
|
||||
UI->>Sync: POST action=disable
|
||||
Sync->>DB: set cloudEnabled=false
|
||||
Sync->>Cloud: DELETE /sync/{machineId}
|
||||
Sync->>Claude: switch ANTHROPIC_BASE_URL back to local (if needed)
|
||||
Sync-->>UI: disabled
|
||||
```
|
||||
|
||||
Periodic sync is triggered by `CloudSyncScheduler` when cloud is enabled.
|
||||
|
||||
## Data Model and Storage Map
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
SETTINGS ||--o{ PROVIDER_CONNECTION : controls
|
||||
PROVIDER_NODE ||--o{ PROVIDER_CONNECTION : backs_compatible_provider
|
||||
PROVIDER_CONNECTION ||--o{ USAGE_ENTRY : emits_usage
|
||||
|
||||
SETTINGS {
|
||||
boolean cloudEnabled
|
||||
number stickyRoundRobinLimit
|
||||
boolean requireLogin
|
||||
string password_hash
|
||||
string fallbackStrategy
|
||||
json rateLimitDefaults
|
||||
json providerProfiles
|
||||
}
|
||||
|
||||
PROVIDER_CONNECTION {
|
||||
string id
|
||||
string provider
|
||||
string authType
|
||||
string name
|
||||
number priority
|
||||
boolean isActive
|
||||
string apiKey
|
||||
string accessToken
|
||||
string refreshToken
|
||||
string expiresAt
|
||||
string testStatus
|
||||
string lastError
|
||||
string rateLimitedUntil
|
||||
json providerSpecificData
|
||||
}
|
||||
|
||||
PROVIDER_NODE {
|
||||
string id
|
||||
string type
|
||||
string name
|
||||
string prefix
|
||||
string apiType
|
||||
string baseUrl
|
||||
}
|
||||
|
||||
MODEL_ALIAS {
|
||||
string alias
|
||||
string targetModel
|
||||
}
|
||||
|
||||
COMBO {
|
||||
string id
|
||||
string name
|
||||
string[] models
|
||||
}
|
||||
|
||||
API_KEY {
|
||||
string id
|
||||
string name
|
||||
string key
|
||||
string machineId
|
||||
}
|
||||
|
||||
USAGE_ENTRY {
|
||||
string provider
|
||||
string model
|
||||
number prompt_tokens
|
||||
number completion_tokens
|
||||
string connectionId
|
||||
string timestamp
|
||||
}
|
||||
|
||||
CUSTOM_MODEL {
|
||||
string id
|
||||
string name
|
||||
string providerId
|
||||
}
|
||||
|
||||
PROXY_CONFIG {
|
||||
string global
|
||||
json providers
|
||||
}
|
||||
|
||||
IP_FILTER {
|
||||
string mode
|
||||
string[] allowlist
|
||||
string[] blocklist
|
||||
}
|
||||
|
||||
THINKING_BUDGET {
|
||||
string mode
|
||||
number customBudget
|
||||
string effortLevel
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT {
|
||||
boolean enabled
|
||||
string prompt
|
||||
string position
|
||||
}
|
||||
```
|
||||
|
||||
Physical storage files:
|
||||
|
||||
- primary runtime DB: `${DATA_DIR}/storage.sqlite`
|
||||
- request log lines: `${DATA_DIR}/log.txt` (compat/debug artifact)
|
||||
- structured call payload archives: `${DATA_DIR}/call_logs/`
|
||||
- optional translator/request debug sessions: `<repo>/logs/...`
|
||||
|
||||
## Deployment Topology
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph LocalHost[Developer Host]
|
||||
CLI[CLI Tools]
|
||||
Browser[Dashboard Browser]
|
||||
end
|
||||
|
||||
subgraph ContainerOrProcess[OmniRoute Runtime]
|
||||
Next[Next.js Server\nPORT=20128]
|
||||
Core[SSE Core + Executors]
|
||||
MainDB[(storage.sqlite)]
|
||||
UsageDB[(usage tables + log artifacts)]
|
||||
end
|
||||
|
||||
subgraph External[External Services]
|
||||
Providers[AI Providers]
|
||||
SyncCloud[Cloud Sync Service]
|
||||
end
|
||||
|
||||
CLI --> Next
|
||||
Browser --> Next
|
||||
Next --> Core
|
||||
Next --> MainDB
|
||||
Core --> MainDB
|
||||
Core --> UsageDB
|
||||
Core --> Providers
|
||||
Next --> SyncCloud
|
||||
```
|
||||
|
||||
## Module Mapping (Decision-Critical)
|
||||
|
||||
### Route and API Modules
|
||||
|
||||
- `src/app/api/v1/*`, `src/app/api/v1beta/*`: compatibility APIs
|
||||
- `src/app/api/v1/providers/[provider]/*`: dedicated per-provider routes (chat, embeddings, images)
|
||||
- `src/app/api/providers*`: provider CRUD, validation, testing
|
||||
- `src/app/api/provider-nodes*`: custom compatible node management
|
||||
- `src/app/api/provider-models`: custom model management (CRUD)
|
||||
- `src/app/api/models/route.ts`: model catalog API (aliases + custom models)
|
||||
- `src/app/api/oauth/*`: OAuth/device-code flows
|
||||
- `src/app/api/keys*`: local API key lifecycle
|
||||
- `src/app/api/models/alias`: alias management
|
||||
- `src/app/api/combos*`: fallback combo management
|
||||
- `src/app/api/pricing`: pricing overrides for cost calculation
|
||||
- `src/app/api/settings/proxy`: proxy configuration (GET/PUT/DELETE)
|
||||
- `src/app/api/settings/proxy/test`: outbound proxy connectivity test (POST)
|
||||
- `src/app/api/usage/*`: usage and logs APIs
|
||||
- `src/app/api/sync/*` + `src/app/api/cloud/*`: cloud sync and cloud-facing helpers
|
||||
- `src/app/api/cli-tools/*`: local CLI config writers/checkers
|
||||
- `src/app/api/settings/ip-filter`: IP allowlist/blocklist (GET/PUT)
|
||||
- `src/app/api/settings/thinking-budget`: thinking token budget config (GET/PUT)
|
||||
- `src/app/api/settings/system-prompt`: global system prompt (GET/PUT)
|
||||
- `src/app/api/sessions`: active session listing (GET)
|
||||
- `src/app/api/rate-limits`: per-account rate limit status (GET)
|
||||
|
||||
### Routing and Execution Core
|
||||
|
||||
- `src/sse/handlers/chat.ts`: request parse, combo handling, account selection loop
|
||||
- `open-sse/handlers/chatCore.ts`: translation, executor dispatch, retry/refresh handling, stream setup
|
||||
- `open-sse/executors/*`: provider-specific network and format behavior
|
||||
|
||||
### Translation Registry and Format Converters
|
||||
|
||||
- `open-sse/translator/index.ts`: translator registry and orchestration
|
||||
- Request translators: `open-sse/translator/request/*`
|
||||
- Response translators: `open-sse/translator/response/*`
|
||||
- Format constants: `open-sse/translator/formats.ts`
|
||||
|
||||
### Persistence
|
||||
|
||||
- `src/lib/db/*`: persistent config/state and domain persistence on SQLite
|
||||
- `src/lib/localDb.ts`: compatibility re-export for DB modules
|
||||
- `src/lib/usageDb.ts`: usage history/call logs facade on top of SQLite tables
|
||||
|
||||
## Provider Executor Coverage (Strategy Pattern)
|
||||
|
||||
Each provider has a specialized executor extending `BaseExecutor` (in `open-sse/executors/base.ts`), which provides URL building, header construction, retry with exponential backoff, credential refresh hooks, and the `execute()` orchestration method.
|
||||
|
||||
| Executor | Provider(s) | Special Handling |
|
||||
| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------- |
|
||||
| `DefaultExecutor` | OpenAI, Claude, Gemini, Qwen, iFlow, OpenRouter, GLM, Kimi, MiniMax, DeepSeek, Groq, xAI, Mistral, Perplexity, Together, Fireworks, Cerebras, Cohere, NVIDIA | Dynamic URL/header config per provider |
|
||||
| `AntigravityExecutor` | Google Antigravity | Custom project/session IDs, Retry-After parsing |
|
||||
| `CodexExecutor` | OpenAI Codex | Injects system instructions, forces reasoning effort |
|
||||
| `CursorExecutor` | Cursor IDE | ConnectRPC protocol, Protobuf encoding, request signing via checksum |
|
||||
| `GithubExecutor` | GitHub Copilot | Copilot token refresh, VSCode-mimicking headers |
|
||||
| `KiroExecutor` | AWS CodeWhisperer/Kiro | AWS EventStream binary format → SSE conversion |
|
||||
| `GeminiCLIExecutor` | Gemini CLI | Google OAuth token refresh cycle |
|
||||
|
||||
All other providers (including custom compatible nodes) use the `DefaultExecutor`.
|
||||
|
||||
## Provider Compatibility Matrix
|
||||
|
||||
| Provider | Format | Auth | Stream | Non-Stream | Token Refresh | Usage API |
|
||||
| ---------------- | ---------------- | --------------------- | ---------------- | ---------- | ------------- | ------------------ |
|
||||
| Claude | claude | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Admin only |
|
||||
| Gemini | gemini | API Key / OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Gemini CLI | gemini-cli | OAuth | ✅ | ✅ | ✅ | ⚠️ Cloud Console |
|
||||
| Antigravity | antigravity | OAuth | ✅ | ✅ | ✅ | ✅ Full quota API |
|
||||
| OpenAI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Codex | openai-responses | OAuth | ✅ forced | ❌ | ✅ | ✅ Rate limits |
|
||||
| GitHub Copilot | openai | OAuth + Copilot Token | ✅ | ✅ | ✅ | ✅ Quota snapshots |
|
||||
| Cursor | cursor | Custom checksum | ✅ | ✅ | ❌ | ❌ |
|
||||
| Kiro | kiro | AWS SSO OIDC | ✅ (EventStream) | ❌ | ✅ | ✅ Usage limits |
|
||||
| Qwen | openai | OAuth | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| iFlow | openai | OAuth (Basic) | ✅ | ✅ | ✅ | ⚠️ Per request |
|
||||
| OpenRouter | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| GLM/Kimi/MiniMax | claude | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| DeepSeek | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Groq | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| xAI (Grok) | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Mistral | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Perplexity | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Together AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Fireworks AI | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cerebras | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| Cohere | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
| NVIDIA NIM | openai | API Key | ✅ | ✅ | ❌ | ❌ |
|
||||
|
||||
## Format Translation Coverage
|
||||
|
||||
Detected source formats include:
|
||||
|
||||
- `openai`
|
||||
- `openai-responses`
|
||||
- `claude`
|
||||
- `gemini`
|
||||
|
||||
Target formats include:
|
||||
|
||||
- OpenAI chat/Responses
|
||||
- Claude
|
||||
- Gemini/Gemini-CLI/Antigravity envelope
|
||||
- Kiro
|
||||
- Cursor
|
||||
|
||||
Translations use **OpenAI as the hub format** — all conversions go through OpenAI as intermediate:
|
||||
|
||||
```
|
||||
Source Format → OpenAI (hub) → Target Format
|
||||
```
|
||||
|
||||
Translations are selected dynamically based on source payload shape and provider target format.
|
||||
|
||||
Additional processing layers in the translation pipeline:
|
||||
|
||||
- **Response sanitization** — Strips non-standard fields from OpenAI-format responses (both streaming and non-streaming) to ensure strict SDK compliance
|
||||
- **Role normalization** — Converts `developer` → `system` for non-OpenAI targets; merges `system` → `user` for models that reject the system role (GLM, ERNIE)
|
||||
- **Think tag extraction** — Parses `<think>...</think>` blocks from content into `reasoning_content` field
|
||||
- **Structured output** — Converts OpenAI `response_format.json_schema` to Gemini's `responseMimeType` + `responseSchema`
|
||||
|
||||
## Supported API Endpoints
|
||||
|
||||
| Endpoint | Format | Handler |
|
||||
| -------------------------------------------------- | ------------------ | ---------------------------------------------------- |
|
||||
| `POST /v1/chat/completions` | OpenAI Chat | `src/sse/handlers/chat.ts` |
|
||||
| `POST /v1/messages` | Claude Messages | Same handler (auto-detected) |
|
||||
| `POST /v1/responses` | OpenAI Responses | `open-sse/handlers/responsesHandler.ts` |
|
||||
| `POST /v1/embeddings` | OpenAI Embeddings | `open-sse/handlers/embeddings.ts` |
|
||||
| `GET /v1/embeddings` | Model listing | API route |
|
||||
| `POST /v1/images/generations` | OpenAI Images | `open-sse/handlers/imageGeneration.ts` |
|
||||
| `GET /v1/images/generations` | Model listing | API route |
|
||||
| `POST /v1/providers/{provider}/chat/completions` | OpenAI Chat | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/embeddings` | OpenAI Embeddings | Dedicated per-provider with model validation |
|
||||
| `POST /v1/providers/{provider}/images/generations` | OpenAI Images | Dedicated per-provider with model validation |
|
||||
| `POST /v1/messages/count_tokens` | Claude Token Count | API route |
|
||||
| `GET /v1/models` | OpenAI Models list | API route (chat + embedding + image + custom models) |
|
||||
| `GET /api/models/catalog` | Catalog | All models grouped by provider + type |
|
||||
| `POST /v1beta/models/*:streamGenerateContent` | Gemini native | API route |
|
||||
| `GET/PUT/DELETE /api/settings/proxy` | Proxy Config | Network proxy configuration |
|
||||
| `POST /api/settings/proxy/test` | Proxy Connectivity | Proxy health/connectivity test endpoint |
|
||||
| `GET/POST/DELETE /api/provider-models` | Custom Models | Custom model management per provider |
|
||||
|
||||
## Bypass Handler
|
||||
|
||||
The bypass handler (`open-sse/utils/bypassHandler.ts`) intercepts known "throwaway" requests from Claude CLI — warmup pings, title extractions, and token counts — and returns a **fake response** without consuming upstream provider tokens. This is triggered only when `User-Agent` contains `claude-cli`.
|
||||
|
||||
## Request Logger Pipeline
|
||||
|
||||
The request logger (`open-sse/utils/requestLogger.ts`) provides a 7-stage debug logging pipeline, disabled by default, enabled via `ENABLE_REQUEST_LOGS=true`:
|
||||
|
||||
```
|
||||
1_req_client.json → 2_req_source.json → 3_req_openai.json → 4_req_target.json
|
||||
→ 5_res_provider.txt → 6_res_openai.txt → 7_res_client.txt
|
||||
```
|
||||
|
||||
Files are written to `<repo>/logs/<session>/` for each request session.
|
||||
|
||||
## Failure Modes and Resilience
|
||||
|
||||
## 1) Account/Provider Availability
|
||||
|
||||
- provider account cooldown on transient/rate/auth errors
|
||||
- account fallback before failing request
|
||||
- combo model fallback when current model/provider path is exhausted
|
||||
|
||||
## 2) Token Expiry
|
||||
|
||||
- pre-check and refresh with retry for refreshable providers
|
||||
- 401/403 retry after refresh attempt in core path
|
||||
|
||||
## 3) Stream Safety
|
||||
|
||||
- disconnect-aware stream controller
|
||||
- translation stream with end-of-stream flush and `[DONE]` handling
|
||||
- usage estimation fallback when provider usage metadata is missing
|
||||
|
||||
## 4) Cloud Sync Degradation
|
||||
|
||||
- sync errors are surfaced but local runtime continues
|
||||
- scheduler has retry-capable logic, but periodic execution currently calls single-attempt sync by default
|
||||
|
||||
## 5) Data Integrity
|
||||
|
||||
- SQLite schema migrations and auto-upgrade hooks at startup
|
||||
- legacy JSON → SQLite migration compatibility path
|
||||
|
||||
## Observability and Operational Signals
|
||||
|
||||
Runtime visibility sources:
|
||||
|
||||
- console logs from `src/sse/utils/logger.ts`
|
||||
- per-request usage aggregates in SQLite (`usage_history`, `call_logs`, `proxy_logs`)
|
||||
- textual request status log in `log.txt` (optional/compat)
|
||||
- optional deep request/translation logs under `logs/` when `ENABLE_REQUEST_LOGS=true`
|
||||
- dashboard usage endpoints (`/api/usage/*`) for UI consumption
|
||||
|
||||
## Security-Sensitive Boundaries
|
||||
|
||||
- JWT secret (`JWT_SECRET`) secures dashboard session cookie verification/signing
|
||||
- Initial password bootstrap (`INITIAL_PASSWORD`) should be explicitly configured for first-run provisioning
|
||||
- API key HMAC secret (`API_KEY_SECRET`) secures generated local API key format
|
||||
- Provider secrets (API keys/tokens) are persisted in local DB and should be protected at filesystem level
|
||||
- Cloud sync endpoints rely on API key auth + machine id semantics
|
||||
|
||||
## Environment and Runtime Matrix
|
||||
|
||||
Environment variables actively used by code:
|
||||
|
||||
- App/auth: `JWT_SECRET`, `INITIAL_PASSWORD`
|
||||
- Storage: `DATA_DIR`
|
||||
- Compatible node behavior: `ALLOW_MULTI_CONNECTIONS_PER_COMPAT_NODE`
|
||||
- Optional storage base override (Linux/macOS when `DATA_DIR` unset): `XDG_CONFIG_HOME`
|
||||
- Security hashing: `API_KEY_SECRET`, `MACHINE_ID_SALT`
|
||||
- Logging: `ENABLE_REQUEST_LOGS`
|
||||
- Sync/cloud URLing: `NEXT_PUBLIC_BASE_URL`, `NEXT_PUBLIC_CLOUD_URL`
|
||||
- Outbound proxy: `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY` and lowercase variants
|
||||
- SOCKS5 feature flags: `ENABLE_SOCKS5_PROXY`, `NEXT_PUBLIC_ENABLE_SOCKS5_PROXY`
|
||||
- Platform/runtime helpers (not app-specific config): `APPDATA`, `NODE_ENV`, `PORT`, `HOSTNAME`
|
||||
|
||||
## Known Architectural Notes
|
||||
|
||||
1. `usageDb` and `localDb` share the same base directory policy (`DATA_DIR` -> `XDG_CONFIG_HOME/omniroute` -> `~/.omniroute`) with legacy file migration.
|
||||
2. `/api/v1/route.ts` delegates to the same unified catalog builder used by `/api/v1/models` (`src/app/api/v1/models/catalog.ts`) to avoid semantic drift.
|
||||
3. Request logger writes full headers/body when enabled; treat log directory as sensitive.
|
||||
4. Cloud behavior depends on correct `NEXT_PUBLIC_BASE_URL` and cloud endpoint reachability.
|
||||
5. The `open-sse/` directory is published as the `@omniroute/open-sse` **npm workspace package**. Source code imports it via `@omniroute/open-sse/...` (resolved by Next.js `transpilePackages`). File paths in this document still use the directory name `open-sse/` for consistency.
|
||||
6. Charts in the dashboard use **Recharts** (SVG-based) for accessible, interactive analytics visualizations (model usage bar charts, provider breakdown tables with success rates).
|
||||
7. E2E tests use **Playwright** (`tests/e2e/`), run via `npm run test:e2e`. Unit tests use **Node.js test runner** (`tests/unit/`), run via `npm run test:unit`. Source code under `src/` is **TypeScript** (`.ts`/`.tsx`); the `open-sse/` workspace remains JavaScript (`.js`).
|
||||
8. Settings page is organized into 5 tabs: Security, Routing (6 global strategies: fill-first, round-robin, p2c, random, least-used, cost-optimized), Resilience (editable rate limits, circuit breaker, policies), AI (thinking budget, system prompt, prompt cache), Advanced (proxy).
|
||||
|
||||
## Operational Verification Checklist
|
||||
|
||||
- Build from source: `npm run build`
|
||||
- Build Docker image: `docker build -t omniroute .`
|
||||
- Start service and verify:
|
||||
- `GET /api/settings`
|
||||
- `GET /api/v1/models`
|
||||
- CLI target base URL should be `http://<host>:20128/v1` when `PORT=20128`
|
||||
@@ -0,0 +1,67 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/AUTO-COMBO.md) · 🇪🇸 [es](../es/AUTO-COMBO.md) · 🇫🇷 [fr](../fr/AUTO-COMBO.md) · 🇩🇪 [de](../de/AUTO-COMBO.md) · 🇮🇹 [it](../it/AUTO-COMBO.md) · 🇷🇺 [ru](../ru/AUTO-COMBO.md) · 🇨🇳 [zh-CN](../zh-CN/AUTO-COMBO.md) · 🇯🇵 [ja](../ja/AUTO-COMBO.md) · 🇰🇷 [ko](../ko/AUTO-COMBO.md) · 🇸🇦 [ar](../ar/AUTO-COMBO.md) · 🇮🇳 [in](../in/AUTO-COMBO.md) · 🇹🇭 [th](../th/AUTO-COMBO.md) · 🇻🇳 [vi](../vi/AUTO-COMBO.md) · 🇮🇩 [id](../id/AUTO-COMBO.md) · 🇲🇾 [ms](../ms/AUTO-COMBO.md) · 🇳🇱 [nl](../nl/AUTO-COMBO.md) · 🇵🇱 [pl](../pl/AUTO-COMBO.md) · 🇸🇪 [sv](../sv/AUTO-COMBO.md) · 🇳🇴 [no](../no/AUTO-COMBO.md) · 🇩🇰 [da](../da/AUTO-COMBO.md) · 🇫🇮 [fi](../fi/AUTO-COMBO.md) · 🇵🇹 [pt](../pt/AUTO-COMBO.md) · 🇷🇴 [ro](../ro/AUTO-COMBO.md) · 🇭🇺 [hu](../hu/AUTO-COMBO.md) · 🇧🇬 [bg](../bg/AUTO-COMBO.md) · 🇸🇰 [sk](../sk/AUTO-COMBO.md) · 🇺🇦 [uk-UA](../uk-UA/AUTO-COMBO.md) · 🇮🇱 [he](../he/AUTO-COMBO.md) · 🇵🇭 [phi](../phi/AUTO-COMBO.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute Auto-Combo Engine
|
||||
|
||||
> Self-managing model chains with adaptive scoring
|
||||
|
||||
## How It Works
|
||||
|
||||
The Auto-Combo Engine dynamically selects the best provider/model for each request using a **6-factor scoring function**:
|
||||
|
||||
| Factor | Weight | Description |
|
||||
| :--------- | :----- | :---------------------------------------------- |
|
||||
| Quota | 0.20 | Remaining capacity [0..1] |
|
||||
| Health | 0.25 | Circuit breaker: CLOSED=1.0, HALF=0.5, OPEN=0.0 |
|
||||
| CostInv | 0.20 | Inverse cost (cheaper = higher score) |
|
||||
| LatencyInv | 0.15 | Inverse p95 latency (faster = higher) |
|
||||
| TaskFit | 0.10 | Model × task type fitness score |
|
||||
| Stability | 0.10 | Low variance in latency/errors |
|
||||
|
||||
## Mode Packs
|
||||
|
||||
| Pack | Focus | Key Weight |
|
||||
| :---------------------- | :----------- | :--------------- |
|
||||
| 🚀 **Ship Fast** | Speed | latencyInv: 0.35 |
|
||||
| 💰 **Cost Saver** | Economy | costInv: 0.40 |
|
||||
| 🎯 **Quality First** | Best model | taskFit: 0.40 |
|
||||
| 📡 **Offline Friendly** | Availability | quota: 0.40 |
|
||||
|
||||
## Self-Healing
|
||||
|
||||
- **Temporary exclusion**: Score < 0.2 → excluded for 5 min (progressive backoff, max 30 min)
|
||||
- **Circuit breaker awareness**: OPEN → auto-excluded; HALF_OPEN → probe requests
|
||||
- **Incident mode**: >50% OPEN → disable exploration, maximize stability
|
||||
- **Cooldown recovery**: After exclusion, first request is a "probe" with reduced timeout
|
||||
|
||||
## Bandit Exploration
|
||||
|
||||
5% of requests (configurable) are routed to random providers for exploration. Disabled in incident mode.
|
||||
|
||||
## API
|
||||
|
||||
```bash
|
||||
# Create auto-combo
|
||||
curl -X POST http://localhost:20128/api/combos/auto \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"id":"my-auto","name":"Auto Coder","candidatePool":["anthropic","google","openai"],"modePack":"ship-fast"}'
|
||||
|
||||
# List auto-combos
|
||||
curl http://localhost:20128/api/combos/auto
|
||||
```
|
||||
|
||||
## Task Fitness
|
||||
|
||||
30+ models scored across 6 task types (`coding`, `review`, `planning`, `analysis`, `debugging`, `documentation`). Supports wildcard patterns (e.g., `*-coder` → high coding score).
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------ |
|
||||
| `open-sse/services/autoCombo/scoring.ts` | Scoring function & pool normalization |
|
||||
| `open-sse/services/autoCombo/taskFitness.ts` | Model × task fitness lookup |
|
||||
| `open-sse/services/autoCombo/engine.ts` | Selection logic, bandit, budget cap |
|
||||
| `open-sse/services/autoCombo/selfHealing.ts` | Exclusion, probes, incident mode |
|
||||
| `open-sse/services/autoCombo/modePacks.ts` | 4 weight profiles |
|
||||
| `src/app/api/combos/auto/route.ts` | REST API |
|
||||
@@ -0,0 +1,593 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/CODEBASE_DOCUMENTATION.md) · 🇪🇸 [es](../es/CODEBASE_DOCUMENTATION.md) · 🇫🇷 [fr](../fr/CODEBASE_DOCUMENTATION.md) · 🇩🇪 [de](../de/CODEBASE_DOCUMENTATION.md) · 🇮🇹 [it](../it/CODEBASE_DOCUMENTATION.md) · 🇷🇺 [ru](../ru/CODEBASE_DOCUMENTATION.md) · 🇨🇳 [zh-CN](../zh-CN/CODEBASE_DOCUMENTATION.md) · 🇯🇵 [ja](../ja/CODEBASE_DOCUMENTATION.md) · 🇰🇷 [ko](../ko/CODEBASE_DOCUMENTATION.md) · 🇸🇦 [ar](../ar/CODEBASE_DOCUMENTATION.md) · 🇮🇳 [in](../in/CODEBASE_DOCUMENTATION.md) · 🇹🇭 [th](../th/CODEBASE_DOCUMENTATION.md) · 🇻🇳 [vi](../vi/CODEBASE_DOCUMENTATION.md) · 🇮🇩 [id](../id/CODEBASE_DOCUMENTATION.md) · 🇲🇾 [ms](../ms/CODEBASE_DOCUMENTATION.md) · 🇳🇱 [nl](../nl/CODEBASE_DOCUMENTATION.md) · 🇵🇱 [pl](../pl/CODEBASE_DOCUMENTATION.md) · 🇸🇪 [sv](../sv/CODEBASE_DOCUMENTATION.md) · 🇳🇴 [no](../no/CODEBASE_DOCUMENTATION.md) · 🇩🇰 [da](../da/CODEBASE_DOCUMENTATION.md) · 🇫🇮 [fi](../fi/CODEBASE_DOCUMENTATION.md) · 🇵🇹 [pt](../pt/CODEBASE_DOCUMENTATION.md) · 🇷🇴 [ro](../ro/CODEBASE_DOCUMENTATION.md) · 🇭🇺 [hu](../hu/CODEBASE_DOCUMENTATION.md) · 🇧🇬 [bg](../bg/CODEBASE_DOCUMENTATION.md) · 🇸🇰 [sk](../sk/CODEBASE_DOCUMENTATION.md) · 🇺🇦 [uk-UA](../uk-UA/CODEBASE_DOCUMENTATION.md) · 🇮🇱 [he](../he/CODEBASE_DOCUMENTATION.md) · 🇵🇭 [phi](../phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
---
|
||||
|
||||
# omniroute — Codebase Documentation
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](CODEBASE_DOCUMENTATION.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/CODEBASE_DOCUMENTATION.md) | 🇪🇸 [Español](i18n/es/CODEBASE_DOCUMENTATION.md) | 🇫🇷 [Français](i18n/fr/CODEBASE_DOCUMENTATION.md) | 🇮🇹 [Italiano](i18n/it/CODEBASE_DOCUMENTATION.md) | 🇷🇺 [Русский](i18n/ru/CODEBASE_DOCUMENTATION.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/CODEBASE_DOCUMENTATION.md) | 🇩🇪 [Deutsch](i18n/de/CODEBASE_DOCUMENTATION.md) | 🇮🇳 [हिन्दी](i18n/in/CODEBASE_DOCUMENTATION.md) | 🇹🇭 [ไทย](i18n/th/CODEBASE_DOCUMENTATION.md) | 🇺🇦 [Українська](i18n/uk-UA/CODEBASE_DOCUMENTATION.md) | 🇸🇦 [العربية](i18n/ar/CODEBASE_DOCUMENTATION.md) | 🇯🇵 [日本語](i18n/ja/CODEBASE_DOCUMENTATION.md) | 🇻🇳 [Tiếng Việt](i18n/vi/CODEBASE_DOCUMENTATION.md) | 🇧🇬 [Български](i18n/bg/CODEBASE_DOCUMENTATION.md) | 🇩🇰 [Dansk](i18n/da/CODEBASE_DOCUMENTATION.md) | 🇫🇮 [Suomi](i18n/fi/CODEBASE_DOCUMENTATION.md) | 🇮🇱 [עברית](i18n/he/CODEBASE_DOCUMENTATION.md) | 🇭🇺 [Magyar](i18n/hu/CODEBASE_DOCUMENTATION.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/CODEBASE_DOCUMENTATION.md) | 🇰🇷 [한국어](i18n/ko/CODEBASE_DOCUMENTATION.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/CODEBASE_DOCUMENTATION.md) | 🇳🇱 [Nederlands](i18n/nl/CODEBASE_DOCUMENTATION.md) | 🇳🇴 [Norsk](i18n/no/CODEBASE_DOCUMENTATION.md) | 🇵🇹 [Português (Portugal)](i18n/pt/CODEBASE_DOCUMENTATION.md) | 🇷🇴 [Română](i18n/ro/CODEBASE_DOCUMENTATION.md) | 🇵🇱 [Polski](i18n/pl/CODEBASE_DOCUMENTATION.md) | 🇸🇰 [Slovenčina](i18n/sk/CODEBASE_DOCUMENTATION.md) | 🇸🇪 [Svenska](i18n/sv/CODEBASE_DOCUMENTATION.md) | 🇵🇭 [Filipino](i18n/phi/CODEBASE_DOCUMENTATION.md)
|
||||
|
||||
> A comprehensive, beginner-friendly guide to the **omniroute** multi-provider AI proxy router.
|
||||
|
||||
---
|
||||
|
||||
## 1. What Is omniroute?
|
||||
|
||||
omniroute is a **proxy router** that sits between AI clients (Claude CLI, Codex, Cursor IDE, etc.) and AI providers (Anthropic, Google, OpenAI, AWS, GitHub, etc.). It solves one big problem:
|
||||
|
||||
> **Different AI clients speak different "languages" (API formats), and different AI providers expect different "languages" too.** omniroute translates between them automatically.
|
||||
|
||||
Think of it like a universal translator at the United Nations — any delegate can speak any language, and the translator converts it for any other delegate.
|
||||
|
||||
---
|
||||
|
||||
## 2. Architecture Overview
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph Clients
|
||||
A[Claude CLI]
|
||||
B[Codex]
|
||||
C[Cursor IDE]
|
||||
D[OpenAI-compatible]
|
||||
end
|
||||
|
||||
subgraph omniroute
|
||||
E[Handler Layer]
|
||||
F[Translator Layer]
|
||||
G[Executor Layer]
|
||||
H[Services Layer]
|
||||
end
|
||||
|
||||
subgraph Providers
|
||||
I[Anthropic Claude]
|
||||
J[Google Gemini]
|
||||
K[OpenAI / Codex]
|
||||
L[GitHub Copilot]
|
||||
M[AWS Kiro]
|
||||
N[Antigravity]
|
||||
O[Cursor API]
|
||||
end
|
||||
|
||||
A --> E
|
||||
B --> E
|
||||
C --> E
|
||||
D --> E
|
||||
E --> F
|
||||
F --> G
|
||||
G --> I
|
||||
G --> J
|
||||
G --> K
|
||||
G --> L
|
||||
G --> M
|
||||
G --> N
|
||||
G --> O
|
||||
H -.-> E
|
||||
H -.-> G
|
||||
```
|
||||
|
||||
### Core Principle: Hub-and-Spoke Translation
|
||||
|
||||
All format translation passes through **OpenAI format as the hub**:
|
||||
|
||||
```
|
||||
Client Format → [OpenAI Hub] → Provider Format (request)
|
||||
Provider Format → [OpenAI Hub] → Client Format (response)
|
||||
```
|
||||
|
||||
This means you only need **N translators** (one per format) instead of **N²** (every pair).
|
||||
|
||||
---
|
||||
|
||||
## 3. Project Structure
|
||||
|
||||
```
|
||||
omniroute/
|
||||
├── open-sse/ ← Core proxy library (portable, framework-agnostic)
|
||||
│ ├── index.js ← Main entry point, exports everything
|
||||
│ ├── config/ ← Configuration & constants
|
||||
│ ├── executors/ ← Provider-specific request execution
|
||||
│ ├── handlers/ ← Request handling orchestration
|
||||
│ ├── services/ ← Business logic (auth, models, fallback, usage)
|
||||
│ ├── translator/ ← Format translation engine
|
||||
│ │ ├── request/ ← Request translators (8 files)
|
||||
│ │ ├── response/ ← Response translators (7 files)
|
||||
│ │ └── helpers/ ← Shared translation utilities (6 files)
|
||||
│ └── utils/ ← Utility functions
|
||||
├── src/ ← Application layer (Express/Worker runtime)
|
||||
│ ├── app/ ← Web UI, API routes, middleware
|
||||
│ ├── lib/ ← Database, auth, and shared library code
|
||||
│ ├── mitm/ ← Man-in-the-middle proxy utilities
|
||||
│ ├── models/ ← Database models
|
||||
│ ├── shared/ ← Shared utilities (wrappers around open-sse)
|
||||
│ ├── sse/ ← SSE endpoint handlers
|
||||
│ └── store/ ← State management
|
||||
├── data/ ← Runtime data (credentials, logs)
|
||||
│ └── provider-credentials.json (external credentials override, gitignored)
|
||||
└── tester/ ← Test utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Module-by-Module Breakdown
|
||||
|
||||
### 4.1 Config (`open-sse/config/`)
|
||||
|
||||
The **single source of truth** for all provider configuration.
|
||||
|
||||
| File | Purpose |
|
||||
| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `constants.ts` | `PROVIDERS` object with base URLs, OAuth credentials (defaults), headers, and default system prompts for every provider. Also defines `HTTP_STATUS`, `ERROR_TYPES`, `COOLDOWN_MS`, `BACKOFF_CONFIG`, and `SKIP_PATTERNS`. |
|
||||
| `credentialLoader.ts` | Loads external credentials from `data/provider-credentials.json` and merges them over the hardcoded defaults in `PROVIDERS`. Keeps secrets out of source control while maintaining backwards compatibility. |
|
||||
| `providerModels.ts` | Central model registry: maps provider aliases → model IDs. Functions like `getModels()`, `getProviderByAlias()`. |
|
||||
| `codexInstructions.ts` | System instructions injected into Codex requests (editing constraints, sandbox rules, approval policies). |
|
||||
| `defaultThinkingSignature.ts` | Default "thinking" signatures for Claude and Gemini models. |
|
||||
| `ollamaModels.ts` | Schema definition for local Ollama models (name, size, family, quantization). |
|
||||
|
||||
#### Credential Loading Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["App starts"] --> B["constants.ts defines PROVIDERS\nwith hardcoded defaults"]
|
||||
B --> C{"data/provider-credentials.json\nexists?"}
|
||||
C -->|Yes| D["credentialLoader reads JSON"]
|
||||
C -->|No| E["Use hardcoded defaults"]
|
||||
D --> F{"For each provider in JSON"}
|
||||
F --> G{"Provider exists\nin PROVIDERS?"}
|
||||
G -->|No| H["Log warning, skip"]
|
||||
G -->|Yes| I{"Value is object?"}
|
||||
I -->|No| J["Log warning, skip"]
|
||||
I -->|Yes| K["Merge clientId, clientSecret,\ntokenUrl, authUrl, refreshUrl"]
|
||||
K --> F
|
||||
H --> F
|
||||
J --> F
|
||||
F -->|Done| L["PROVIDERS ready with\nmerged credentials"]
|
||||
E --> L
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 Executors (`open-sse/executors/`)
|
||||
|
||||
Executors encapsulate **provider-specific logic** using the **Strategy Pattern**. Each executor overrides base methods as needed.
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BaseExecutor {
|
||||
+buildUrl(model, stream, options)
|
||||
+buildHeaders(credentials, stream, body)
|
||||
+transformRequest(body, model, stream, credentials)
|
||||
+execute(url, options)
|
||||
+shouldRetry(status, error)
|
||||
+refreshCredentials(credentials, log)
|
||||
}
|
||||
|
||||
class DefaultExecutor {
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class AntigravityExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+shouldRetry()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
class CursorExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseResponse()
|
||||
+generateChecksum()
|
||||
}
|
||||
|
||||
class KiroExecutor {
|
||||
+buildUrl()
|
||||
+buildHeaders()
|
||||
+transformRequest()
|
||||
+parseEventStream()
|
||||
+refreshCredentials()
|
||||
}
|
||||
|
||||
BaseExecutor <|-- DefaultExecutor
|
||||
BaseExecutor <|-- AntigravityExecutor
|
||||
BaseExecutor <|-- CursorExecutor
|
||||
BaseExecutor <|-- KiroExecutor
|
||||
BaseExecutor <|-- CodexExecutor
|
||||
BaseExecutor <|-- GeminiCLIExecutor
|
||||
BaseExecutor <|-- GithubExecutor
|
||||
```
|
||||
|
||||
| Executor | Provider | Key Specializations |
|
||||
| ---------------- | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- |
|
||||
| `base.ts` | — | Abstract base: URL building, headers, retry logic, credential refresh |
|
||||
| `default.ts` | Claude, Gemini, OpenAI, GLM, Kimi, MiniMax | Generic OAuth token refresh for standard providers |
|
||||
| `antigravity.ts` | Google Cloud Code | Project/session ID generation, multi-URL fallback, custom retry parsing from error messages ("reset after 2h7m23s") |
|
||||
| `cursor.ts` | Cursor IDE | **Most complex**: SHA-256 checksum auth, Protobuf request encoding, binary EventStream → SSE response parsing |
|
||||
| `codex.ts` | OpenAI Codex | Injects system instructions, manages thinking levels, removes unsupported parameters |
|
||||
| `gemini-cli.ts` | Google Gemini CLI | Custom URL building (`streamGenerateContent`), Google OAuth token refresh |
|
||||
| `github.ts` | GitHub Copilot | Dual token system (GitHub OAuth + Copilot token), VSCode header mimicking |
|
||||
| `kiro.ts` | AWS CodeWhisperer | AWS EventStream binary parsing, AMZN event frames, token estimation |
|
||||
| `index.ts` | — | Factory: maps provider name → executor class, with default fallback |
|
||||
|
||||
---
|
||||
|
||||
### 4.3 Handlers (`open-sse/handlers/`)
|
||||
|
||||
The **orchestration layer** — coordinates translation, execution, streaming, and error handling.
|
||||
|
||||
| File | Purpose |
|
||||
| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `chatCore.ts` | **Central orchestrator** (~600 lines). Handles the complete request lifecycle: format detection → translation → executor dispatch → streaming/non-streaming response → token refresh → error handling → usage logging. |
|
||||
| `responsesHandler.ts` | Adapter for OpenAI's Responses API: converts Responses format → Chat Completions → sends to `chatCore` → converts SSE back to Responses format. |
|
||||
| `embeddings.ts` | Embedding generation handler: resolves embedding model → provider, dispatches to provider API, returns OpenAI-compatible embedding response. Supports 6+ providers. |
|
||||
| `imageGeneration.ts` | Image generation handler: resolves image model → provider, supports OpenAI-compatible, Gemini-image (Antigravity), and fallback (Nebius) modes. Returns base64 or URL images. |
|
||||
|
||||
#### Request Lifecycle (chatCore.ts)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant chatCore
|
||||
participant Translator
|
||||
participant Executor
|
||||
participant Provider
|
||||
|
||||
Client->>chatCore: Request (any format)
|
||||
chatCore->>chatCore: Detect source format
|
||||
chatCore->>chatCore: Check bypass patterns
|
||||
chatCore->>chatCore: Resolve model & provider
|
||||
chatCore->>Translator: Translate request (source → OpenAI → target)
|
||||
chatCore->>Executor: Get executor for provider
|
||||
Executor->>Executor: Build URL, headers, transform request
|
||||
Executor->>Executor: Refresh credentials if needed
|
||||
Executor->>Provider: HTTP fetch (streaming or non-streaming)
|
||||
|
||||
alt Streaming
|
||||
Provider-->>chatCore: SSE stream
|
||||
chatCore->>chatCore: Pipe through SSE transform stream
|
||||
Note over chatCore: Transform stream translates<br/>each chunk: target → OpenAI → source
|
||||
chatCore-->>Client: Translated SSE stream
|
||||
else Non-streaming
|
||||
Provider-->>chatCore: JSON response
|
||||
chatCore->>Translator: Translate response
|
||||
chatCore-->>Client: Translated JSON
|
||||
end
|
||||
|
||||
alt Error (401, 429, 500...)
|
||||
chatCore->>Executor: Retry with credential refresh
|
||||
chatCore->>chatCore: Account fallback logic
|
||||
end
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.4 Services (`open-sse/services/`)
|
||||
|
||||
Business logic that supports the handlers and executors.
|
||||
|
||||
| File | Purpose |
|
||||
| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `provider.ts` | **Format detection** (`detectFormat`): analyzes request body structure to identify Claude/OpenAI/Gemini/Antigravity/Responses formats (includes `max_tokens` heuristic for Claude). Also: URL building, header building, thinking config normalization. Supports `openai-compatible-*` and `anthropic-compatible-*` dynamic providers. |
|
||||
| `model.ts` | Model string parsing (`claude/model-name` → `{provider: "claude", model: "model-name"}`), alias resolution with collision detection, input sanitization (rejects path traversal/control chars), and model info resolution with async alias getter support. |
|
||||
| `accountFallback.ts` | Rate-limit handling: exponential backoff (1s → 2s → 4s → max 2min), account cooldown management, error classification (which errors trigger fallback vs. not). |
|
||||
| `tokenRefresh.ts` | OAuth token refresh for **every provider**: Google (Gemini, Antigravity), Claude, Codex, Qwen, iFlow, GitHub (OAuth + Copilot dual-token), Kiro (AWS SSO OIDC + Social Auth). Includes in-flight promise deduplication cache and retry with exponential backoff. |
|
||||
| `combo.ts` | **Combo models**: chains of fallback models. If model A fails with a fallback-eligible error, try model B, then C, etc. Returns actual upstream status codes. |
|
||||
| `usage.ts` | Fetches quota/usage data from provider APIs (GitHub Copilot quotas, Antigravity model quotas, Codex rate limits, Kiro usage breakdowns, Claude settings). |
|
||||
| `accountSelector.ts` | Smart account selection with scoring algorithm: considers priority, health status, round-robin position, and cooldown state to pick the optimal account for each request. |
|
||||
| `contextManager.ts` | Request context lifecycle management: creates and tracks per-request context objects with metadata (request ID, timestamps, provider info) for debugging and logging. |
|
||||
| `ipFilter.ts` | IP-based access control: supports allowlist and blocklist modes. Validates client IP against configured rules before processing API requests. |
|
||||
| `sessionManager.ts` | Session tracking with client fingerprinting: tracks active sessions using hashed client identifiers, monitors request counts, and provides session metrics. |
|
||||
| `signatureCache.ts` | Request signature-based deduplication cache: prevents duplicate requests by caching recent request signatures and returning cached responses for identical requests within a time window. |
|
||||
| `systemPrompt.ts` | Global system prompt injection: prepends or appends a configurable system prompt to all requests, with per-provider compatibility handling. |
|
||||
| `thinkingBudget.ts` | Reasoning token budget management: supports passthrough, auto (strip thinking config), custom (fixed budget), and adaptive (complexity-scaled) modes for controlling thinking/reasoning tokens. |
|
||||
| `wildcardRouter.ts` | Wildcard model pattern routing: resolves wildcard patterns (e.g., `*/claude-*`) to concrete provider/model pairs based on availability and priority. |
|
||||
|
||||
#### Token Refresh Deduplication
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant R1 as Request 1
|
||||
participant R2 as Request 2
|
||||
participant Cache as refreshPromiseCache
|
||||
participant OAuth as OAuth Provider
|
||||
|
||||
R1->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: No in-flight promise
|
||||
Cache->>OAuth: Start refresh
|
||||
R2->>Cache: getAccessToken("gemini", token)
|
||||
Cache->>Cache: Found in-flight promise
|
||||
Cache-->>R2: Return existing promise
|
||||
OAuth-->>Cache: New access token
|
||||
Cache-->>R1: New access token
|
||||
Cache-->>R2: Same access token (shared)
|
||||
Cache->>Cache: Delete cache entry
|
||||
```
|
||||
|
||||
#### Account Fallback State Machine
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> Active
|
||||
Active --> Error: Request fails (401/429/500)
|
||||
Error --> Cooldown: Apply backoff
|
||||
Cooldown --> Active: Cooldown expires
|
||||
Active --> Active: Request succeeds (reset backoff)
|
||||
|
||||
state Error {
|
||||
[*] --> ClassifyError
|
||||
ClassifyError --> ShouldFallback: Rate limit / Auth / Transient
|
||||
ClassifyError --> NoFallback: 400 Bad Request
|
||||
}
|
||||
|
||||
state Cooldown {
|
||||
[*] --> ExponentialBackoff
|
||||
ExponentialBackoff: Level 0 = 1s
|
||||
ExponentialBackoff: Level 1 = 2s
|
||||
ExponentialBackoff: Level 2 = 4s
|
||||
ExponentialBackoff: Max = 2min
|
||||
}
|
||||
```
|
||||
|
||||
#### Combo Model Chain
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Request with\ncombo model"] --> B["Model A"]
|
||||
B -->|"2xx Success"| C["Return response"]
|
||||
B -->|"429/401/500"| D{"Fallback\neligible?"}
|
||||
D -->|Yes| E["Model B"]
|
||||
D -->|No| F["Return error"]
|
||||
E -->|"2xx Success"| C
|
||||
E -->|"429/401/500"| G{"Fallback\neligible?"}
|
||||
G -->|Yes| H["Model C"]
|
||||
G -->|No| F
|
||||
H -->|"2xx Success"| C
|
||||
H -->|"Fail"| I["All failed →\nReturn last status"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.5 Translator (`open-sse/translator/`)
|
||||
|
||||
The **format translation engine** using a self-registering plugin system.
|
||||
|
||||
#### Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "Request Translation"
|
||||
A["Claude → OpenAI"]
|
||||
B["Gemini → OpenAI"]
|
||||
C["Antigravity → OpenAI"]
|
||||
D["OpenAI Responses → OpenAI"]
|
||||
E["OpenAI → Claude"]
|
||||
F["OpenAI → Gemini"]
|
||||
G["OpenAI → Kiro"]
|
||||
H["OpenAI → Cursor"]
|
||||
end
|
||||
|
||||
subgraph "Response Translation"
|
||||
I["Claude → OpenAI"]
|
||||
J["Gemini → OpenAI"]
|
||||
K["Kiro → OpenAI"]
|
||||
L["Cursor → OpenAI"]
|
||||
M["OpenAI → Claude"]
|
||||
N["OpenAI → Antigravity"]
|
||||
O["OpenAI → Responses"]
|
||||
end
|
||||
```
|
||||
|
||||
| Directory | Files | Description |
|
||||
| ------------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `request/` | 8 translators | Convert request bodies between formats. Each file self-registers via `register(from, to, fn)` on import. |
|
||||
| `response/` | 7 translators | Convert streaming response chunks between formats. Handles SSE event types, thinking blocks, tool calls. |
|
||||
| `helpers/` | 6 helpers | Shared utilities: `claudeHelper` (system prompt extraction, thinking config), `geminiHelper` (parts/contents mapping), `openaiHelper` (format filtering), `toolCallHelper` (ID generation, missing response injection), `maxTokensHelper`, `responsesApiHelper`. |
|
||||
| `index.ts` | — | Translation engine: `translateRequest()`, `translateResponse()`, state management, registry. |
|
||||
| `formats.ts` | — | Format constants: `OPENAI`, `CLAUDE`, `GEMINI`, `ANTIGRAVITY`, `KIRO`, `CURSOR`, `OPENAI_RESPONSES`. |
|
||||
|
||||
#### Key Design: Self-Registering Plugins
|
||||
|
||||
```javascript
|
||||
// Each translator file calls register() on import:
|
||||
import { register } from "../index.js";
|
||||
register("claude", "openai", translateClaudeToOpenAI);
|
||||
|
||||
// The index.js imports all translator files, triggering registration:
|
||||
import "./request/claude-to-openai.js"; // ← self-registers
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.6 Utils (`open-sse/utils/`)
|
||||
|
||||
| File | Purpose |
|
||||
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `error.ts` | Error response building (OpenAI-compatible format), upstream error parsing, Antigravity retry-time extraction from error messages, SSE error streaming. |
|
||||
| `stream.ts` | **SSE Transform Stream** — the core streaming pipeline. Two modes: `TRANSLATE` (full format translation) and `PASSTHROUGH` (normalize + extract usage). Handles chunk buffering, usage estimation, content length tracking. Per-stream encoder/decoder instances avoid shared state. |
|
||||
| `streamHelpers.ts` | Low-level SSE utilities: `parseSSELine` (whitespace-tolerant), `hasValuableContent` (filters empty chunks for OpenAI/Claude/Gemini), `fixInvalidId`, `formatSSE` (format-aware SSE serialization with `perf_metrics` cleanup). |
|
||||
| `usageTracking.ts` | Token usage extraction from any format (Claude/OpenAI/Gemini/Responses), estimation with separate tool/message char-per-token ratios, buffer addition (2000 tokens safety margin), format-specific field filtering, console logging with ANSI colors. |
|
||||
| `requestLogger.ts` | File-based request logging (opt-in via `ENABLE_REQUEST_LOGS=true`). Creates session folders with numbered files: `1_req_client.json` → `7_res_client.txt`. All I/O is async (fire-and-forget). Masks sensitive headers. |
|
||||
| `bypassHandler.ts` | Intercepts specific patterns from Claude CLI (title extraction, warmup, count) and returns fake responses without calling any provider. Supports both streaming and non-streaming. Intentionally limited to Claude CLI scope. |
|
||||
| `networkProxy.ts` | Resolves outbound proxy URL for a given provider with precedence: provider-specific config → global config → environment variables (`HTTPS_PROXY`/`HTTP_PROXY`/`ALL_PROXY`). Supports `NO_PROXY` exclusions. Caches config for 30s. |
|
||||
|
||||
#### SSE Streaming Pipeline
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["Provider SSE stream"] --> B["TextDecoder\n(per-stream instance)"]
|
||||
B --> C["Buffer lines\n(split on newline)"]
|
||||
C --> D["parseSSELine()\n(trim whitespace, parse JSON)"]
|
||||
D --> E{"Mode?"}
|
||||
E -->|TRANSLATE| F["translateResponse()\ntarget → OpenAI → source"]
|
||||
E -->|PASSTHROUGH| G["fixInvalidId()\nnormalize chunk"]
|
||||
F --> H["hasValuableContent()\nfilter empty chunks"]
|
||||
G --> H
|
||||
H -->|"Has content"| I["extractUsage()\ntrack token counts"]
|
||||
H -->|"Empty"| J["Skip chunk"]
|
||||
I --> K["formatSSE()\nserialize + clean perf_metrics"]
|
||||
K --> L["TextEncoder\n(per-stream instance)"]
|
||||
L --> M["Enqueue to\nclient stream"]
|
||||
|
||||
style A fill:#f9f,stroke:#333
|
||||
style M fill:#9f9,stroke:#333
|
||||
```
|
||||
|
||||
#### Request Logger Session Structure
|
||||
|
||||
```
|
||||
logs/
|
||||
└── claude_gemini_claude-sonnet_20260208_143045/
|
||||
├── 1_req_client.json ← Raw client request
|
||||
├── 2_req_source.json ← After initial conversion
|
||||
├── 3_req_openai.json ← OpenAI intermediate format
|
||||
├── 4_req_target.json ← Final target format
|
||||
├── 5_res_provider.txt ← Provider SSE chunks (streaming)
|
||||
├── 5_res_provider.json ← Provider response (non-streaming)
|
||||
├── 6_res_openai.txt ← OpenAI intermediate chunks
|
||||
├── 7_res_client.txt ← Client-facing SSE chunks
|
||||
└── 6_error.json ← Error details (if any)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.7 Application Layer (`src/`)
|
||||
|
||||
| Directory | Purpose |
|
||||
| ------------- | ---------------------------------------------------------------------- |
|
||||
| `src/app/` | Web UI, API routes, Express middleware, OAuth callback handlers |
|
||||
| `src/lib/` | Database access (`localDb.ts`, `usageDb.ts`), authentication, shared |
|
||||
| `src/mitm/` | Man-in-the-middle proxy utilities for intercepting provider traffic |
|
||||
| `src/models/` | Database model definitions |
|
||||
| `src/shared/` | Wrappers around open-sse functions (provider, stream, error, etc.) |
|
||||
| `src/sse/` | SSE endpoint handlers that wire the open-sse library to Express routes |
|
||||
| `src/store/` | Application state management |
|
||||
|
||||
#### Notable API Routes
|
||||
|
||||
| Route | Methods | Purpose |
|
||||
| --------------------------------------------- | --------------- | ------------------------------------------------------------------------------------- |
|
||||
| `/api/provider-models` | GET/POST/DELETE | CRUD for custom models per provider |
|
||||
| `/api/models/catalog` | GET | Aggregated catalog of all models (chat, embedding, image, custom) grouped by provider |
|
||||
| `/api/settings/proxy` | GET/PUT/DELETE | Hierarchical outbound proxy configuration (`global/providers/combos/keys`) |
|
||||
| `/api/settings/proxy/test` | POST | Validates proxy connectivity and returns public IP/latency |
|
||||
| `/v1/providers/[provider]/chat/completions` | POST | Dedicated per-provider chat completions with model validation |
|
||||
| `/v1/providers/[provider]/embeddings` | POST | Dedicated per-provider embeddings with model validation |
|
||||
| `/v1/providers/[provider]/images/generations` | POST | Dedicated per-provider image generation with model validation |
|
||||
| `/api/settings/ip-filter` | GET/PUT | IP allowlist/blocklist management |
|
||||
| `/api/settings/thinking-budget` | GET/PUT | Reasoning token budget configuration (passthrough/auto/custom/adaptive) |
|
||||
| `/api/settings/system-prompt` | GET/PUT | Global system prompt injection for all requests |
|
||||
| `/api/sessions` | GET | Active session tracking and metrics |
|
||||
| `/api/rate-limits` | GET | Per-account rate limit status |
|
||||
|
||||
---
|
||||
|
||||
## 5. Key Design Patterns
|
||||
|
||||
### 5.1 Hub-and-Spoke Translation
|
||||
|
||||
All formats translate through **OpenAI format as the hub**. Adding a new provider only requires writing **one pair** of translators (to/from OpenAI), not N pairs.
|
||||
|
||||
### 5.2 Executor Strategy Pattern
|
||||
|
||||
Each provider has a dedicated executor class inheriting from `BaseExecutor`. The factory in `executors/index.ts` selects the right one at runtime.
|
||||
|
||||
### 5.3 Self-Registering Plugin System
|
||||
|
||||
Translator modules register themselves on import via `register()`. Adding a new translator is just creating a file and importing it.
|
||||
|
||||
### 5.4 Account Fallback with Exponential Backoff
|
||||
|
||||
When a provider returns 429/401/500, the system can switch to the next account, applying exponential cooldowns (1s → 2s → 4s → max 2min).
|
||||
|
||||
### 5.5 Combo Model Chains
|
||||
|
||||
A "combo" groups multiple `provider/model` strings. If the first fails, fallback to the next automatically.
|
||||
|
||||
### 5.6 Stateful Streaming Translation
|
||||
|
||||
Response translation maintains state across SSE chunks (thinking block tracking, tool call accumulation, content block indexing) via the `initState()` mechanism.
|
||||
|
||||
### 5.7 Usage Safety Buffer
|
||||
|
||||
A 2000-token buffer is added to reported usage to prevent clients from hitting context window limits due to overhead from system prompts and format translation.
|
||||
|
||||
---
|
||||
|
||||
## 6. Supported Formats
|
||||
|
||||
| Format | Direction | Identifier |
|
||||
| ----------------------- | --------------- | ------------------ |
|
||||
| OpenAI Chat Completions | source + target | `openai` |
|
||||
| OpenAI Responses API | source + target | `openai-responses` |
|
||||
| Anthropic Claude | source + target | `claude` |
|
||||
| Google Gemini | source + target | `gemini` |
|
||||
| Google Gemini CLI | target only | `gemini-cli` |
|
||||
| Antigravity | source + target | `antigravity` |
|
||||
| AWS Kiro | target only | `kiro` |
|
||||
| Cursor | target only | `cursor` |
|
||||
|
||||
---
|
||||
|
||||
## 7. Supported Providers
|
||||
|
||||
| Provider | Auth Method | Executor | Key Notes |
|
||||
| ------------------------ | ---------------------- | ----------- | --------------------------------------------- |
|
||||
| Anthropic Claude | API key or OAuth | Default | Uses `x-api-key` header |
|
||||
| Google Gemini | API key or OAuth | Default | Uses `x-goog-api-key` header |
|
||||
| Google Gemini CLI | OAuth | GeminiCLI | Uses `streamGenerateContent` endpoint |
|
||||
| Antigravity | OAuth | Antigravity | Multi-URL fallback, custom retry parsing |
|
||||
| OpenAI | API key | Default | Standard Bearer auth |
|
||||
| Codex | OAuth | Codex | Injects system instructions, manages thinking |
|
||||
| GitHub Copilot | OAuth + Copilot token | Github | Dual token, VSCode header mimicking |
|
||||
| Kiro (AWS) | AWS SSO OIDC or Social | Kiro | Binary EventStream parsing |
|
||||
| Cursor IDE | Checksum auth | Cursor | Protobuf encoding, SHA-256 checksums |
|
||||
| Qwen | OAuth | Default | Standard auth |
|
||||
| iFlow | OAuth (Basic + Bearer) | Default | Dual auth header |
|
||||
| OpenRouter | API key | Default | Standard Bearer auth |
|
||||
| GLM, Kimi, MiniMax | API key | Default | Claude-compatible, use `x-api-key` |
|
||||
| `openai-compatible-*` | API key | Default | Dynamic: any OpenAI-compatible endpoint |
|
||||
| `anthropic-compatible-*` | API key | Default | Dynamic: any Claude-compatible endpoint |
|
||||
|
||||
---
|
||||
|
||||
## 8. Data Flow Summary
|
||||
|
||||
### Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor\nbuildUrl + buildHeaders"]
|
||||
D --> E["fetch(providerURL)"]
|
||||
E --> F["createSSEStream()\nTRANSLATE mode"]
|
||||
F --> G["parseSSELine()"]
|
||||
G --> H["translateResponse()\ntarget → OpenAI → source"]
|
||||
H --> I["extractUsage()\n+ addBuffer"]
|
||||
I --> J["formatSSE()"]
|
||||
J --> K["Client receives\ntranslated SSE"]
|
||||
K --> L["logUsage()\nsaveRequestUsage()"]
|
||||
```
|
||||
|
||||
### Non-Streaming Request
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Client"] --> B["detectFormat()"]
|
||||
B --> C["translateRequest()\nsource → OpenAI → target"]
|
||||
C --> D["Executor.execute()"]
|
||||
D --> E["translateResponse()\ntarget → OpenAI → source"]
|
||||
E --> F["Return JSON\nresponse"]
|
||||
```
|
||||
|
||||
### Bypass Flow (Claude CLI)
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Claude CLI request"] --> B{"Match bypass\npattern?"}
|
||||
B -->|"Title/Warmup/Count"| C["Generate fake\nOpenAI response"]
|
||||
B -->|"No match"| D["Normal flow"]
|
||||
C --> E["Translate to\nsource format"]
|
||||
E --> F["Return without\ncalling provider"]
|
||||
```
|
||||
@@ -0,0 +1,148 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/FEATURES.md) · 🇪🇸 [es](../es/FEATURES.md) · 🇫🇷 [fr](../fr/FEATURES.md) · 🇩🇪 [de](../de/FEATURES.md) · 🇮🇹 [it](../it/FEATURES.md) · 🇷🇺 [ru](../ru/FEATURES.md) · 🇨🇳 [zh-CN](../zh-CN/FEATURES.md) · 🇯🇵 [ja](../ja/FEATURES.md) · 🇰🇷 [ko](../ko/FEATURES.md) · 🇸🇦 [ar](../ar/FEATURES.md) · 🇮🇳 [in](../in/FEATURES.md) · 🇹🇭 [th](../th/FEATURES.md) · 🇻🇳 [vi](../vi/FEATURES.md) · 🇮🇩 [id](../id/FEATURES.md) · 🇲🇾 [ms](../ms/FEATURES.md) · 🇳🇱 [nl](../nl/FEATURES.md) · 🇵🇱 [pl](../pl/FEATURES.md) · 🇸🇪 [sv](../sv/FEATURES.md) · 🇳🇴 [no](../no/FEATURES.md) · 🇩🇰 [da](../da/FEATURES.md) · 🇫🇮 [fi](../fi/FEATURES.md) · 🇵🇹 [pt](../pt/FEATURES.md) · 🇷🇴 [ro](../ro/FEATURES.md) · 🇭🇺 [hu](../hu/FEATURES.md) · 🇧🇬 [bg](../bg/FEATURES.md) · 🇸🇰 [sk](../sk/FEATURES.md) · 🇺🇦 [uk-UA](../uk-UA/FEATURES.md) · 🇮🇱 [he](../he/FEATURES.md) · 🇵🇭 [phi](../phi/FEATURES.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute — Dashboard Features Gallery
|
||||
|
||||
🌐 **Languages:** 🇺🇸 [English](FEATURES.md) | 🇧🇷 [Português (Brasil)](i18n/pt-BR/FEATURES.md) | 🇪🇸 [Español](i18n/es/FEATURES.md) | 🇫🇷 [Français](i18n/fr/FEATURES.md) | 🇮🇹 [Italiano](i18n/it/FEATURES.md) | 🇷🇺 [Русский](i18n/ru/FEATURES.md) | 🇨🇳 [中文 (简体)](i18n/zh-CN/FEATURES.md) | 🇩🇪 [Deutsch](i18n/de/FEATURES.md) | 🇮🇳 [हिन्दी](i18n/in/FEATURES.md) | 🇹🇭 [ไทย](i18n/th/FEATURES.md) | 🇺🇦 [Українська](i18n/uk-UA/FEATURES.md) | 🇸🇦 [العربية](i18n/ar/FEATURES.md) | 🇯🇵 [日本語](i18n/ja/FEATURES.md) | 🇻🇳 [Tiếng Việt](i18n/vi/FEATURES.md) | 🇧🇬 [Български](i18n/bg/FEATURES.md) | 🇩🇰 [Dansk](i18n/da/FEATURES.md) | 🇫🇮 [Suomi](i18n/fi/FEATURES.md) | 🇮🇱 [עברית](i18n/he/FEATURES.md) | 🇭🇺 [Magyar](i18n/hu/FEATURES.md) | 🇮🇩 [Bahasa Indonesia](i18n/id/FEATURES.md) | 🇰🇷 [한국어](i18n/ko/FEATURES.md) | 🇲🇾 [Bahasa Melayu](i18n/ms/FEATURES.md) | 🇳🇱 [Nederlands](i18n/nl/FEATURES.md) | 🇳🇴 [Norsk](i18n/no/FEATURES.md) | 🇵🇹 [Português (Portugal)](i18n/pt/FEATURES.md) | 🇷🇴 [Română](i18n/ro/FEATURES.md) | 🇵🇱 [Polski](i18n/pl/FEATURES.md) | 🇸🇰 [Slovenčina](i18n/sk/FEATURES.md) | 🇸🇪 [Svenska](i18n/sv/FEATURES.md) | 🇵🇭 [Filipino](i18n/phi/FEATURES.md)
|
||||
|
||||
Visual guide to every section of the OmniRoute dashboard.
|
||||
|
||||
---
|
||||
|
||||
## 🔌 Providers
|
||||
|
||||
Manage AI provider connections: OAuth providers (Claude Code, Codex, Gemini CLI), API key providers (Groq, DeepSeek, OpenRouter), and free providers (iFlow, Qwen, Kiro).
|
||||
|
||||
- **Ollama Cloud** — Cloud-hosted Ollama models at `api.ollama.com` (free "Light usage" tier); use `ollamacloud/<model>` prefix
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎨 Combos
|
||||
|
||||
Create model routing combos with 6 strategies: priority, weighted, round-robin, random, least-used, and cost-optimized. Each combo chains multiple models with automatic fallback and includes quick templates and readiness checks.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 📊 Analytics
|
||||
|
||||
Comprehensive usage analytics with token consumption, cost estimates, activity heatmaps, weekly distribution charts, and per-provider breakdowns.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🏥 System Health
|
||||
|
||||
Real-time monitoring: uptime, memory, version, latency percentiles (p50/p95/p99), cache statistics, and provider circuit breaker states.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 Translator Playground
|
||||
|
||||
Four modes for debugging API translations: **Playground** (format converter), **Chat Tester** (live requests), **Test Bench** (batch tests), and **Live Monitor** (real-time stream).
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🎮 Model Playground _(v2.0.9+)_
|
||||
|
||||
Test any model directly from the dashboard. Select provider, model, and endpoint, write prompts with Monaco Editor, stream responses in real-time, abort mid-stream, and view timing metrics.
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Themes _(v2.0.5+)_
|
||||
|
||||
Customizable color themes for the entire dashboard. Choose from 7 preset colors (Coral, Blue, Red, Green, Violet, Orange, Cyan) or create a custom theme by picking any hex color. Supports light, dark, and system mode.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Settings
|
||||
|
||||
Comprehensive settings panel with tabs:
|
||||
|
||||
- **General** — System storage, backup management (export/import database)
|
||||
- **Appearance** — Theme selector (dark/light/system), color theme presets and custom colors, health log visibility
|
||||
- **Security** — API endpoint protection, custom provider blocking, IP filtering, session info
|
||||
- **Routing** — Model aliases, background task degradation
|
||||
- **Resilience** — Rate limit persistence, circuit breaker tuning
|
||||
- **Advanced** — Configuration overrides
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔧 CLI Tools
|
||||
|
||||
One-click configuration for AI coding tools: Claude Code, Codex CLI, Gemini CLI, OpenClaw, Kilo Code, Antigravity, Cline, Continue, Cursor, and Factory Droid. Features automated config apply/reset, connection profiles, and model mapping.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🤖 CLI Agents _(v2.0.11+)_
|
||||
|
||||
Dashboard for discovering and managing CLI agents. Shows a grid of 14 built-in agents (Codex, Claude, Goose, Gemini CLI, OpenClaw, Aider, OpenCode, Cline, Qwen Code, ForgeCode, Amazon Q, Open Interpreter, Cursor CLI, Warp) with:
|
||||
|
||||
- **Installation status** — Installed / Not Found with version detection
|
||||
- **Protocol badges** — stdio, HTTP, etc.
|
||||
- **Custom agents** — Register any CLI tool via form (name, binary, version command, spawn args)
|
||||
- **CLI Fingerprint Matching** — Per-provider toggle to match native CLI request signatures, reducing ban risk while preserving proxy IP
|
||||
|
||||
---
|
||||
|
||||
## 🖼️ Media _(v2.0.3+)_
|
||||
|
||||
Generate images, videos, and music from the dashboard. Supports OpenAI, xAI, Together, Hyperbolic, SD WebUI, ComfyUI, AnimateDiff, Stable Audio Open, and MusicGen.
|
||||
|
||||
---
|
||||
|
||||
## 📝 Request Logs
|
||||
|
||||
Real-time request logging with filtering by provider, model, account, and API key. Shows status codes, token usage, latency, and response details.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🌐 API Endpoint
|
||||
|
||||
Your unified API endpoint with capability breakdown: Chat Completions, Responses API, Embeddings, Image Generation, Reranking, Audio Transcription, Text-to-Speech, Moderations, and registered API keys. Cloud proxy support for remote access.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 🔑 API Key Management
|
||||
|
||||
Create, scope, and revoke API keys. Each key can be restricted to specific models/providers with full access or read-only permissions. Visual key management with usage tracking.
|
||||
|
||||
---
|
||||
|
||||
## 📋 Audit Log
|
||||
|
||||
Administrative action tracking with filtering by action type, actor, target, IP address, and timestamp. Full security event history.
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ Desktop Application
|
||||
|
||||
Native Electron desktop app for Windows, macOS, and Linux. Run OmniRoute as a standalone application with system tray integration, offline support, auto-update, and one-click install.
|
||||
|
||||
Key features:
|
||||
|
||||
- Server readiness polling (no blank screen on cold start)
|
||||
- System tray with port management
|
||||
- Content Security Policy
|
||||
- Single-instance lock
|
||||
- Auto-update on restart
|
||||
- Platform-conditional UI (macOS traffic lights, Windows/Linux default titlebar)
|
||||
|
||||
📖 See [`electron/README.md`](../electron/README.md) for full documentation.
|
||||
@@ -0,0 +1,87 @@
|
||||
🌐 **Languages:** 🇺🇸 [English](../../README.md) · 🇧🇷 [pt-BR](../pt-BR/MCP-SERVER.md) · 🇪🇸 [es](../es/MCP-SERVER.md) · 🇫🇷 [fr](../fr/MCP-SERVER.md) · 🇩🇪 [de](../de/MCP-SERVER.md) · 🇮🇹 [it](../it/MCP-SERVER.md) · 🇷🇺 [ru](../ru/MCP-SERVER.md) · 🇨🇳 [zh-CN](../zh-CN/MCP-SERVER.md) · 🇯🇵 [ja](../ja/MCP-SERVER.md) · 🇰🇷 [ko](../ko/MCP-SERVER.md) · 🇸🇦 [ar](../ar/MCP-SERVER.md) · 🇮🇳 [in](../in/MCP-SERVER.md) · 🇹🇭 [th](../th/MCP-SERVER.md) · 🇻🇳 [vi](../vi/MCP-SERVER.md) · 🇮🇩 [id](../id/MCP-SERVER.md) · 🇲🇾 [ms](../ms/MCP-SERVER.md) · 🇳🇱 [nl](../nl/MCP-SERVER.md) · 🇵🇱 [pl](../pl/MCP-SERVER.md) · 🇸🇪 [sv](../sv/MCP-SERVER.md) · 🇳🇴 [no](../no/MCP-SERVER.md) · 🇩🇰 [da](../da/MCP-SERVER.md) · 🇫🇮 [fi](../fi/MCP-SERVER.md) · 🇵🇹 [pt](../pt/MCP-SERVER.md) · 🇷🇴 [ro](../ro/MCP-SERVER.md) · 🇭🇺 [hu](../hu/MCP-SERVER.md) · 🇧🇬 [bg](../bg/MCP-SERVER.md) · 🇸🇰 [sk](../sk/MCP-SERVER.md) · 🇺🇦 [uk-UA](../uk-UA/MCP-SERVER.md) · 🇮🇱 [he](../he/MCP-SERVER.md) · 🇵🇭 [phi](../phi/MCP-SERVER.md)
|
||||
|
||||
---
|
||||
|
||||
# OmniRoute MCP Server Documentation
|
||||
|
||||
> Model Context Protocol server with 16 intelligent tools
|
||||
|
||||
## Installation
|
||||
|
||||
OmniRoute MCP is built-in. Start it with:
|
||||
|
||||
```bash
|
||||
omniroute --mcp
|
||||
```
|
||||
|
||||
Or via the open-sse transport:
|
||||
|
||||
```bash
|
||||
# HTTP streamable transport (port 20130)
|
||||
omniroute --dev # MCP auto-starts on /mcp endpoint
|
||||
```
|
||||
|
||||
## IDE Configuration
|
||||
|
||||
See [IDE Configs](integrations/ide-configs.md) for Antigravity, Cursor, Copilot, and Claude Desktop setup.
|
||||
|
||||
---
|
||||
|
||||
## Essential Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :------------------------------ | :--------------------------------------- |
|
||||
| `omniroute_get_health` | Gateway health, circuit breakers, uptime |
|
||||
| `omniroute_list_combos` | All configured combos with models |
|
||||
| `omniroute_get_combo_metrics` | Performance metrics for a specific combo |
|
||||
| `omniroute_switch_combo` | Switch active combo by ID/name |
|
||||
| `omniroute_check_quota` | Quota status per provider or all |
|
||||
| `omniroute_route_request` | Send a chat completion through OmniRoute |
|
||||
| `omniroute_cost_report` | Cost analytics for a time period |
|
||||
| `omniroute_list_models_catalog` | Full model catalog with capabilities |
|
||||
|
||||
## Advanced Tools (8)
|
||||
|
||||
| Tool | Description |
|
||||
| :--------------------------------- | :---------------------------------------------- |
|
||||
| `omniroute_simulate_route` | Dry-run routing simulation with fallback tree |
|
||||
| `omniroute_set_budget_guard` | Session budget with degrade/block/alert actions |
|
||||
| `omniroute_set_resilience_profile` | Apply conservative/balanced/aggressive preset |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo |
|
||||
| `omniroute_get_provider_metrics` | Detailed metrics for one provider |
|
||||
| `omniroute_best_combo_for_task` | Task-fitness recommendation with alternatives |
|
||||
| `omniroute_explain_route` | Explain a past routing decision |
|
||||
| `omniroute_get_session_snapshot` | Full session state: costs, tokens, errors |
|
||||
|
||||
## Authentication
|
||||
|
||||
MCP tools are authenticated via API key scopes. Each tool requires specific scopes:
|
||||
|
||||
| Scope | Tools |
|
||||
| :------------- | :----------------------------------------------- |
|
||||
| `read:health` | get_health, get_provider_metrics |
|
||||
| `read:combos` | list_combos, get_combo_metrics |
|
||||
| `write:combos` | switch_combo |
|
||||
| `read:quota` | check_quota |
|
||||
| `write:route` | route_request, simulate_route, test_combo |
|
||||
| `read:usage` | cost_report, get_session_snapshot, explain_route |
|
||||
| `write:config` | set_budget_guard, set_resilience_profile |
|
||||
| `read:models` | list_models_catalog, best_combo_for_task |
|
||||
|
||||
## Audit Logging
|
||||
|
||||
Every tool call is logged to `mcp_tool_audit` with:
|
||||
|
||||
- Tool name, arguments, result
|
||||
- Duration (ms), success/failure
|
||||
- API key hash, timestamp
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| :------------------------------------------- | :------------------------------------------ |
|
||||
| `open-sse/mcp-server/server.ts` | MCP server creation + 16 tool registrations |
|
||||
| `open-sse/mcp-server/transport.ts` | Stdio + HTTP transport |
|
||||
| `open-sse/mcp-server/auth.ts` | API key + scope validation |
|
||||
| `open-sse/mcp-server/audit.ts` | Tool call audit logging |
|
||||
| `open-sse/mcp-server/tools/advancedTools.ts` | 8 advanced tool handlers |
|
||||
@@ -11,6 +11,18 @@ _Tu proxy de API universal — un endpoint, 36+ proveedores, cero tiempo de inac
|
||||
|
||||
---
|
||||
|
||||
### 🚀 New in v2.0.9+ — Playground, CLI Fingerprints & ACP
|
||||
|
||||
| Feature | What It Does |
|
||||
| ------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| 🎮 **Model Playground** | Dashboard page to test any model directly — provider/model/endpoint selectors, Monaco Editor, streaming, abort, timing |
|
||||
| 🔏 **CLI Fingerprint Matching** | Per-provider header/body ordering to match native CLI signatures — toggle per provider in Settings > Security. **Your proxy IP is preserved** |
|
||||
| 🤝 **ACP Support (Agent Client Protocol)** | CLI agent discovery (Codex, Claude, Goose, Gemini CLI, OpenClaw), process spawner, `/api/acp/agents` endpoint |
|
||||
| 🤖 **ACP Agents Dashboard** | Debug > Agents page — grid of 14 agents with install status, version, custom agent form for any CLI tool |
|
||||
| 🔧 **Custom Model `apiFormat` Routing** | Custom models with `apiFormat: "responses"` now correctly route to the Responses API translator |
|
||||
| 🏢 **Codex Workspace Isolation** | Multiple Codex workspaces per email — OAuth correctly separates connections by workspace ID |
|
||||
| 🔄 **Electron Auto-Update** | Desktop app checks for updates + auto-install on restart |
|
||||
|
||||
### 🤖 Proveedor de IA Gratuito para tus agentes de programación favoritos
|
||||
|
||||
_Conecta cualquier IDE o herramienta CLI con IA a través de OmniRoute — gateway de API gratuito para programación ilimitada._
|
||||
@@ -104,12 +116,41 @@ _Conecta cualquier IDE o herramienta CLI con IA a través de OmniRoute — gatew
|
||||
|
||||
[🌐 Website](https://omniroute.online) • [🚀 Inicio Rápido](#-inicio-rápido) • [💡 Características](#-características-principales) • [📖 Docs](#-documentación) • [💰 Precios](#-precios-resumidos)
|
||||
|
||||
🌐 **Disponible en:** [English](README.md) | [Português](README.pt-BR.md) | [Español](README.es.md) | [Русский](README.ru.md) | [中文](README.zh-CN.md) | [Deutsch](README.de.md) | [Français](README.fr.md) | [Italiano](README.it.md)
|
||||
🌐 **Disponible en:** 🇺🇸 [English](../../README.md) | 🇧🇷 [Português (Brasil)](../pt-BR/README.md) | 🇪🇸 [Español](../es/README.md) | 🇫🇷 [Français](../fr/README.md) | 🇮🇹 [Italiano](../it/README.md) | 🇷🇺 [Русский](../ru/README.md) | 🇨🇳 [中文 (简体)](../zh-CN/README.md) | 🇩🇪 [Deutsch](../de/README.md) | 🇮🇳 [हिन्दी](../in/README.md) | 🇹🇭 [ไทย](../th/README.md) | 🇺🇦 [Українська](../uk-UA/README.md) | 🇸🇦 [العربية](../ar/README.md) | 🇯🇵 [日本語](../ja/README.md) | 🇻🇳 [Tiếng Việt](../vi/README.md) | 🇧🇬 [Български](../bg/README.md) | 🇩🇰 [Dansk](../da/README.md) | 🇫🇮 [Suomi](../fi/README.md) | 🇮🇱 [עברית](../he/README.md) | 🇭🇺 [Magyar](../hu/README.md) | 🇮🇩 [Bahasa Indonesia](../id/README.md) | 🇰🇷 [한국어](../ko/README.md) | 🇲🇾 [Bahasa Melayu](../ms/README.md) | 🇳🇱 [Nederlands](../nl/README.md) | 🇳🇴 [Norsk](../no/README.md) | 🇵🇹 [Português (Portugal)](../pt/README.md) | 🇷🇴 [Română](../ro/README.md) | 🇵🇱 [Polski](../pl/README.md) | 🇸🇰 [Slovenčina](../sk/README.md) | 🇸🇪 [Svenska](../sv/README.md) | 🇵🇭 [Filipino](../phi/README.md)
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## 🖼️
|
||||
|
||||
<div align="center">
|
||||
<img src="./docs/screenshots/MainOmniRoute.png" alt="OmniRoute" width="800"/>
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## 📸
|
||||
|
||||
<details>
|
||||
<summary><b>...</b></summary>
|
||||
|
||||
| # | # |
|
||||
| ----- | ---------------------------------------- |
|
||||
| **1** |  |
|
||||
| **2** |  |
|
||||
| **3** |  |
|
||||
| **4** |  |
|
||||
| **5** |  |
|
||||
| **6** |  |
|
||||
| **7** |  |
|
||||
| **8** |  |
|
||||
| **9** |  |
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🤔 ¿Por qué OmniRoute?
|
||||
|
||||
**Deja de desperdiciar dinero y chocar con límites:**
|
||||
@@ -128,6 +169,18 @@ _Conecta cualquier IDE o herramienta CLI con IA a través de OmniRoute — gatew
|
||||
|
||||
---
|
||||
|
||||
## 📧 Soporte
|
||||
|
||||
> 💬 **¡Únete a la comunidad!** [Grupo WhatsApp](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t) — Obtén ayuda, comparte consejos y mantente al día.
|
||||
|
||||
- **Website**: [omniroute.online](https://omniroute.online)
|
||||
- **GitHub**: [github.com/diegosouzapw/OmniRoute](https://github.com/diegosouzapw/OmniRoute)
|
||||
- **Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **WhatsApp**: [Grupo de la Comunidad](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t)
|
||||
- **Proyecto Original**: [9router por decolua](https://github.com/decolua/9router)
|
||||
|
||||
---
|
||||
|
||||
## 🔄 Cómo Funciona
|
||||
|
||||
```
|
||||
@@ -157,6 +210,498 @@ Resultado: Nunca dejes de programar, costo mínimo
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Lo que resuelve OmniRoute: 30 puntos débiles reales y casos de uso
|
||||
|
||||
> **Todos los desarrolladores que utilizan herramientas de IA se enfrentan a estos problemas a diario.** OmniRoute se creó para resolverlos todos: desde sobrecostos hasta bloqueos regionales, desde flujos rotos de OAuth hasta operaciones de protocolo y observabilidad empresarial.
|
||||
|
||||
<details>
|
||||
<summary><b>💸 1. "Pago una suscripción costosa pero aún así me interrumpen los límites"</b></summary>
|
||||
|
||||
Los desarrolladores pagan entre 20 y 200 dólares al mes por Claude Pro, Codex Pro o GitHub Copilot. Incluso pagando, la cuota tiene un límite: 5 horas de uso, límites semanales o límites de tarifa por minuto. A mitad de la sesión de codificación, el proveedor deja de responder y el desarrollador pierde flujo y productividad.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Reserva inteligente de 4 niveles**: si se agota la cuota de suscripción, se redirige automáticamente a la clave API → Barato → Gratis sin intervención manual
|
||||
- **Seguimiento de cuotas en tiempo real**: muestra el consumo de tokens en tiempo real con cuenta regresiva de reinicio (5 h, diario, semanal)
|
||||
- **Soporte multicuenta**: varias cuentas por proveedor con rotación automática: cuando una se agota, cambia a la siguiente
|
||||
- **Combinaciones personalizadas**: cadenas de respaldo personalizables con 6 estrategias de equilibrio (completar primero, por turnos, P2C, aleatoria, menos utilizada, de costo optimizado)
|
||||
- **Cuotas comerciales de Codex**: monitoreo de cuotas del espacio de trabajo empresarial/de equipo directamente en el panel
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔌 2. "Necesito usar varios proveedores pero cada uno tiene una API diferente"</b></summary>
|
||||
|
||||
OpenAI usa un formato, Claude (Anthropic) usa otro, Gemini otro más. Si un desarrollador quiere probar modelos de diferentes proveedores o recurrir a ellos, debe reconfigurar los SDK, cambiar los puntos finales y lidiar con formatos incompatibles. Los proveedores personalizados (FriendLI, NIM) tienen puntos finales de modelo no estándar.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Punto final unificado**: un único `http://localhost:20128/v1` sirve como proxy para los más de 36 proveedores
|
||||
- **Traducción de formato**: automática y transparente: OpenAI ↔ Claude ↔ Gemini ↔ API de respuestas
|
||||
- **Desinfección de respuesta**: elimina los campos no estándar (`x_groq`, `usage_breakdown`, `service_tier`) que interrumpen OpenAI SDK v1.83+
|
||||
- **Normalización de roles**: convierte `developer` → `system` para proveedores que no son OpenAI; `system` → `user` para GLM/ERNIE
|
||||
- **Think Tag Extraction**: extrae bloques `<think>` de modelos como DeepSeek R1 en `reasoning_content` estandarizado.
|
||||
- **Salida estructurada para Gemini** — `json_schema` → `responseMimeType`/`responseSchema` conversión automática
|
||||
- **`stream` por defecto es `false`**: se alinea con las especificaciones de OpenAI, evitando SSE inesperado en los SDK de Python/Rust/Go
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🌐 3. "Mi proveedor de IA bloquea mi región/país"</b></summary>
|
||||
|
||||
Proveedores como OpenAI/Codex bloquean el acceso desde ciertas regiones geográficas. Los usuarios obtienen errores como `unsupported_country_region_territory` durante las conexiones OAuth y API. Esto resulta especialmente frustrante para los desarrolladores de los países en desarrollo.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Configuración de proxy de 3 niveles**: Proxy configurable en 3 niveles: global (todo el tráfico), por proveedor (un solo proveedor) y por conexión/clave.
|
||||
- **Insignias de proxy codificadas por colores** — Indicadores visuales: 🟢 proxy global, 🟡 proxy de proveedor, 🔵 proxy de conexión, que siempre muestra la IP
|
||||
- **Intercambio de tokens de OAuth a través de proxy**: el flujo de OAuth también pasa a través del proxy, lo que resuelve `unsupported_country_region_territory`
|
||||
- **Pruebas de conexión a través de proxy**: las pruebas de conexión utilizan el proxy configurado (no más derivación directa)
|
||||
- **Soporte SOCKS5**: soporte completo de proxy SOCKS5 para enrutamiento saliente
|
||||
- **Suplantación de huellas dactilares TLS**: huella digital TLS similar a la de un navegador a través de `wreq-js` para evitar la detección de bots.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🆓 4. "Quiero usar IA para codificar pero no tengo dinero"</b></summary>
|
||||
|
||||
No todo el mundo puede pagar entre 20 y 200 dólares al mes por suscripciones a IA. Los estudiantes, desarrolladores de países emergentes, aficionados y autónomos necesitan acceso a modelos de calidad sin costo alguno.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Proveedores de nivel gratuito integrados**: soporte nativo para proveedores 100% gratuitos: iFlow (8 modelos ilimitados), Qwen (3 modelos ilimitados), Kiro (Claude gratis), Gemini CLI (180K/mes gratis)
|
||||
- **Ollama Cloud** — Cloud-hosted Ollama models at `api.ollama.com` with free "Light usage" tier; use `ollamacloud/<model>` prefix
|
||||
- **Combos solo gratuitos**: cadena `gc/gemini-3-flash → if/kimi-k2-thinking → qw/qwen3-coder-plus` = $0/mes sin tiempo de inactividad
|
||||
- **Créditos gratuitos NVIDIA NIM**: 1000 créditos gratuitos integrados
|
||||
- **Estrategia de optimización de costos**: estrategia de enrutamiento que elige automáticamente el proveedor más barato disponible
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔒 5. "Necesito proteger mi puerta de enlace AI del acceso no autorizado"</b></summary>
|
||||
|
||||
Al exponer una puerta de enlace de IA a la red (LAN, VPS, Docker), cualquiera con la dirección puede consumir los tokens/cuota del desarrollador. Sin protección, las API son vulnerables al mal uso, la inyección rápida y el abuso.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Administración de claves API**: generación, rotación y alcance por proveedor con una página `/dashboard/api-manager` dedicada
|
||||
- **Permisos a nivel de modelo**: restrinja las claves API a modelos específicos (`openai/*`, patrones comodín), con la opción Permitir todo/Restringir
|
||||
- **API Endpoint Protection**: requiere una clave para `/v1/models` y bloquea proveedores específicos del listado
|
||||
- **Auth Guard + Protección CSRF**: todas las rutas del panel protegidas con middleware `withAuth` + tokens CSRF
|
||||
- **Limitador de velocidad**: limitación de velocidad por IP con ventanas configurables
|
||||
- **Filtrado de IP**: lista permitida/lista bloqueada para control de acceso
|
||||
- **Prompt injection guard**: desinfección contra patrones de avisos maliciosos
|
||||
- **Cifrado AES-256-GCM**: credenciales cifradas en reposo
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🛑 6. "Mi proveedor dejó de funcionar y perdí mi flujo de codificación"</b></summary>
|
||||
|
||||
Los proveedores de IA pueden volverse inestables, devolver errores 5xx o alcanzar límites de velocidad temporales. Si un desarrollador depende de un solo proveedor, se le interrumpe. Sin disyuntores, los reintentos repetidos pueden bloquear la aplicación.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Disyuntor por proveedor**: apertura/cierre automático con umbrales configurables y enfriamiento (cerrado/abierto/medio abierto)
|
||||
- **Retroceso exponencial**: retrasos progresivos en los reintentos
|
||||
- **Anti-Thundering Herd** — Mutex + protección de semáforo contra tormentas de reintentos simultáneos
|
||||
- **Cadenas alternativas combinadas**: si el proveedor principal falla, automáticamente pasa por la cadena sin intervención.
|
||||
- **Disyuntor combinado**: desactiva automáticamente los proveedores defectuosos dentro de una cadena combinada
|
||||
- **Panel de estado**: monitoreo del tiempo de actividad, estados de disyuntores, bloqueos, estadísticas de caché, latencia p50/p95/p99
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔧 7. "Configurar cada herramienta de IA es tedioso y repetitivo"</b></summary>
|
||||
|
||||
Los desarrolladores utilizan Cursor, Claude Code, Codex CLI, OpenClaw, Gemini CLI, Kilo Code... Cada herramienta necesita una configuración diferente (punto final API, clave, modelo). Reconfigurar al cambiar de proveedor o modelo es una pérdida de tiempo.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Panel de herramientas CLI**: página dedicada con configuración con un solo clic para Claude Code, Codex CLI, OpenClaw, Kilo Code, Antigravity, Cline
|
||||
- **Generador de configuración de GitHub Copilot**: genera `chatLanguageModels.json` para código VS con selección de modelo masivo
|
||||
- **Asistente de incorporación**: configuración guiada de 4 pasos para usuarios nuevos
|
||||
- **Un punto final, todos los modelos**: configure `http://localhost:20128/v1` una vez, acceda a más de 36 proveedores
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔑 8. "Administrar tokens OAuth de múltiples proveedores es un infierno"</b></summary>
|
||||
|
||||
Claude Code, Codex, Gemini CLI, Copilot: todos usan OAuth 2.0 con tokens que caducan. Los desarrolladores necesitan volver a autenticarse constantemente, lidiar con `client_secret is missing`, `redirect_uri_mismatch` y fallas en servidores remotos. OAuth en LAN/VPS es particularmente problemático.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Actualización automática de tokens**: los tokens de OAuth se actualizan en segundo plano antes de que caduquen
|
||||
- **OAuth 2.0 (PKCE) integrado**: flujo automático para Claude Code, Codex, Gemini CLI, Copilot, Kiro, Qwen, iFlow
|
||||
- **OAuth multicuenta**: varias cuentas por proveedor mediante extracción de token JWT/ID
|
||||
- **OAuth LAN/Remote Fix** — Detección de IP privada para `redirect_uri` + modo URL manual para servidores remotos
|
||||
- **OAuth detrás de Nginx**: utiliza `window.location.origin` para compatibilidad con proxy inverso
|
||||
- **Guía remota de OAuth**: guía paso a paso para las credenciales de Google Cloud en VPS/Docker
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>📊 9. "No sé cuánto estoy gastando ni dónde"</b></summary>
|
||||
|
||||
Los desarrolladores utilizan múltiples proveedores pagos pero no tienen una visión unificada del gasto. Cada proveedor tiene su propio panel de facturación, pero no hay una vista consolidada. Los costos inesperados pueden acumularse.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Panel de análisis de costos**: seguimiento de costos por token y gestión de presupuesto por proveedor
|
||||
- **Límites de presupuesto por nivel**: límite de gasto por nivel que activa el respaldo automático
|
||||
- **Configuración de precios por modelo**: precios configurables por modelo
|
||||
- **Estadísticas de uso por clave API**: recuento de solicitudes y marca de tiempo utilizada por última vez por clave
|
||||
- **Panel de análisis**: tarjetas de estadísticas, tabla de uso de modelos, tabla de proveedores con tasas de éxito y latencia.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🐛 10. "No puedo diagnosticar errores y problemas en llamadas AI"</b></summary>
|
||||
|
||||
Cuando falla una llamada, el desarrollador no sabe si se trata de un límite de velocidad, un token caducado, un formato incorrecto o un error del proveedor. Registros fragmentados en diferentes terminales. Sin observabilidad, la depuración es de prueba y error.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Panel de registros unificados**: 4 pestañas: registros de solicitudes, registros de proxy, registros de auditoría y consola
|
||||
- **Visor de registros de consola**: visor estilo terminal en tiempo real con niveles codificados por colores, desplazamiento automático, búsqueda y filtro
|
||||
- **Registros de proxy SQLite**: registros persistentes que sobreviven a los reinicios del servidor
|
||||
- **Translator Playground**: 4 modos de depuración: Playground (traducción de formato), Chat Tester (ida y vuelta), Test Bench (por lotes), Live Monitor (en tiempo real)
|
||||
- **Solicitud de telemetría**: latencia p50/p95/p99 + seguimiento de X-Request-Id
|
||||
- **Registro basado en archivos con rotación**: el interceptor de consola captura todo en el registro JSON con rotación basada en el tamaño.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🏗️ 11. "Implementar y mantener la puerta de enlace es complejo"</b></summary>
|
||||
|
||||
Instalar, configurar y mantener un proxy de IA en diferentes entornos (local, VPS, Docker, nube) requiere mucha mano de obra. Problemas como rutas codificadas, `EACCES` en directorios, conflictos de puertos y compilaciones multiplataforma añaden fricción.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **instalación global de npm** — `npm install -g omniroute && omniroute` — hecho
|
||||
- **Docker multiplataforma**: AMD64 + ARM64 nativo (Apple Silicon, AWS Graviton, Raspberry Pi)
|
||||
- **Perfiles de Docker Compose**: `base` (sin herramientas CLI) y `cli` (con Claude Code, Codex, OpenClaw)
|
||||
- **Aplicación de escritorio Electron**: aplicación nativa para Windows/macOS/Linux con bandeja del sistema, inicio automático y modo sin conexión
|
||||
- **Modo de puerto dividido**: API y panel en puertos separados para escenarios avanzados (proxy inverso, redes de contenedores)
|
||||
- **Cloud Sync**: sincronización de configuración entre dispositivos a través de Cloudflare Workers
|
||||
- **Copias de seguridad de base de datos**: copia de seguridad, restauración, exportación e importación automáticas de todas las configuraciones
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🌍 12. "La interfaz es solo en inglés y mi equipo no habla inglés"</b></summary>
|
||||
|
||||
Los equipos en países que no hablan inglés, especialmente en América Latina, Asia y Europa, tienen dificultades con las interfaces solo en inglés. Las barreras del idioma reducen la adopción y aumentan los errores de configuración.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Panel i18n — 30 idiomas** — Las más de 500 teclas traducidas, incluidas árabe, búlgaro, danés, alemán, español, finlandés, francés, hebreo, hindi, húngaro, indonesio, italiano, japonés, coreano, malayo, holandés, noruego, polaco, portugués (PT/BR), rumano, ruso, eslovaco, sueco, tailandés, ucraniano, vietnamita, chino, filipino, inglés.
|
||||
- **Soporte RTL**: soporte de derecha a izquierda para árabe y hebreo
|
||||
- **README multilingüe**: 30 traducciones de documentación completa
|
||||
- **Selector de idioma**: ícono de globo en el encabezado para cambiar en tiempo real
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔄 13. "Necesito más que chat: necesito incrustaciones, imágenes y audio"</b></summary>
|
||||
|
||||
La IA no es solo completar un chat. Los desarrolladores necesitan generar imágenes, transcribir audio, crear incrustaciones para RAG, reclasificar documentos y moderar contenido. Cada API tiene un punto final y un formato diferentes.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Integraciones** — `/v1/embeddings` con 6 proveedores y más de 9 modelos
|
||||
- **Generación de imágenes** — `/v1/images/generations` con 10 proveedores y más de 20 modelos (OpenAI, xAI, Together, Fireworks, Nebius, Hyperbolic, NanoBanana, Antigravity, SD WebUI, ComfyUI)
|
||||
- **Texto a vídeo** — `/v1/videos/generations` — ComfyUI (AnimateDiff, SVD) y SD WebUI
|
||||
- **Texto a música** — `/v1/music/generations` — ComfyUI (audio estable abierto, MusicGen)
|
||||
- **Transcripción de audio** — `/v1/audio/transcriptions` — Whisper + Nvidia NIM, HuggingFace, Qwen3
|
||||
- **Texto a voz** — `/v1/audio/speech` — ElevenLabs, Nvidia NIM, HuggingFace, Coqui, Tortoise, Qwen3, Inworld, Cartesia, PlayHT y proveedores existentes
|
||||
- **Moderaciones** — `/v1/moderations` — Comprobaciones de seguridad del contenido
|
||||
- **Reclasificación** — `/v1/rerank` — Reclasificación de relevancia del documento
|
||||
- **API de respuestas**: compatibilidad total con `/v1/responses` para Codex
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🧪 14. "No tengo forma de probar y comparar la calidad entre modelos"</b></summary>
|
||||
|
||||
Los desarrolladores quieren saber qué modelo es mejor para su caso de uso (código, traducción, razonamiento), pero comparar manualmente es lento. No existen herramientas de evaluación integradas.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Evaluaciones LLM**: pruebas de conjunto dorado con 10 casos precargados que cubren saludos, matemáticas, geografía, generación de código, cumplimiento de JSON, traducción, rebajas y rechazo de seguridad.
|
||||
- **4 estrategias de coincidencia**: `exact`, `contains`, `regex`, `custom` (función JS)
|
||||
- **Translator Playground Test Bench**: pruebas por lotes con múltiples entradas y resultados esperados, comparación entre proveedores
|
||||
- **Chat Tester**: recorrido completo de ida y vuelta con representación de respuesta visual
|
||||
- **Live Monitor**: flujo en tiempo real de todas las solicitudes que fluyen a través del proxy
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>📈 15. "Necesito escalar sin perder rendimiento"</b></summary>
|
||||
|
||||
A medida que crece el volumen de solicitudes, sin almacenar en caché las mismas preguntas generan costos duplicados. Sin idempotencia, las solicitudes duplicadas desperdician el procesamiento. Se deben respetar los límites de tarifas por proveedor.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Caché semántica**: la caché de dos niveles (firma + semántica) reduce el costo y la latencia
|
||||
- **Solicitud de idempotencia**: ventana de deduplicación de 5 segundos para solicitudes idénticas
|
||||
- **Detección de límite de velocidad**: RPM por proveedor, intervalo mínimo y seguimiento simultáneo máximo
|
||||
- **Límites de velocidad editables**: valores predeterminados configurables en Configuración → Resiliencia con persistencia
|
||||
- **Caché de validación de clave API**: caché de 3 niveles para rendimiento de producción
|
||||
- **Panel de estado con telemetría**: latencia p50/p95/p99, estadísticas de caché, tiempo de actividad
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🤖 16. "Quiero controlar el comportamiento del modelo globalmente"</b></summary>
|
||||
|
||||
Desarrolladores que quieran todas las respuestas en un idioma específico, con un tono específico o quieran limitar los tokens de razonamiento. Configurar esto en cada herramienta/solicitud no es práctico.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- **Inyección de aviso del sistema**: aviso global aplicado a todas las solicitudes
|
||||
- **Thinking Budget Validation**: control de asignación de tokens de razonamiento por solicitud (transferencia, automática, personalizada, adaptativa)
|
||||
- **6 estrategias de enrutamiento**: estrategias globales que determinan cómo se distribuyen las solicitudes
|
||||
- **Enrutador comodín**: los patrones `provider/*` se enrutan dinámicamente a cualquier proveedor
|
||||
- **Activar/desactivar combinación de alternar**: alterna combinaciones directamente desde el panel
|
||||
- **Alternar proveedor**: activa/desactiva todas las conexiones de un proveedor con un solo clic
|
||||
- **Proveedores bloqueados**: excluye proveedores específicos del listado `/v1/models`
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🧰 17. "Necesito herramientas MCP como capacidades de producto de primera clase"</b></summary>
|
||||
|
||||
Muchas puertas de enlace de IA exponen MCP solo como un detalle de implementación oculto. Los equipos necesitan una capa operativa visible y manejable.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- MCP aparece en la pestaña de navegación del panel y protocolo de punto final
|
||||
- Página de gestión de MCP dedicada con procesos, herramientas, alcances y auditoría
|
||||
- Inicio rápido integrado para `omniroute --mcp` e incorporación de clientes
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🧠 18. "Necesito orquestación A2A con rutas de tareas de sincronización + transmisión"</b></summary>
|
||||
|
||||
Los flujos de trabajo de los agentes necesitan respuestas directas y una ejecución continua de larga duración con control del ciclo de vida.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Punto final A2A JSON-RPC (`POST /a2a`) con `message/send` y `message/stream`
|
||||
- Transmisión SSE con propagación del estado terminal
|
||||
- API de ciclo de vida de tareas para `tasks/get` y `tasks/cancel`
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🛰️ 19. "Necesito un estado real del proceso MCP, no un estado adivinado"</b></summary>
|
||||
|
||||
Los equipos operativos necesitan saber si MCP está realmente activo, no solo si se puede acceder a una API.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Archivo de latidos en tiempo de ejecución con PID, marcas de tiempo, transporte, recuento de herramientas y modo de alcance
|
||||
- API de estado de MCP que combina latidos + actividad reciente
|
||||
- Tarjetas de estado de la interfaz de usuario para el proceso/tiempo de actividad/actualización de latidos
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>📋 20. "Necesito ejecución de herramienta MCP auditable"</b></summary>
|
||||
|
||||
Cuando las herramientas modifican la configuración o desencadenan acciones de operaciones, los equipos necesitan trazabilidad forense.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Registro de auditoría respaldado por SQLite para llamadas a herramientas MCP
|
||||
- Filtros por herramienta, éxito/fracaso, clave API y paginación
|
||||
- Tabla de auditoría del panel + puntos finales de estadísticas para automatización
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔐 21. "Necesito permisos MCP con alcance por integración"</b></summary>
|
||||
|
||||
Los diferentes clientes deberían tener acceso con privilegios mínimos a las categorías de herramientas.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- 9 alcances MCP granulares para acceso controlado a herramientas
|
||||
- Aplicación del alcance y visibilidad en la interfaz de usuario de gestión de MCP
|
||||
- Postura predeterminada segura para herramientas operativas
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>⚙️ 22. "Necesito controles operativos sin redistribuir"</b></summary>
|
||||
|
||||
Los equipos necesitan cambios rápidos en el tiempo de ejecución durante incidentes o eventos de costos.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Cambie la activación combinada directamente desde el panel de MCP
|
||||
- Aplicar perfiles de resiliencia de paquetes de políticas predefinidos
|
||||
- Restablecer el estado del disyuntor desde el mismo panel de operaciones.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🔄 23. "Necesito visibilidad y cancelación del ciclo de vida de la tarea A2A en vivo"</b></summary>
|
||||
|
||||
Sin visibilidad del ciclo de vida, los incidentes de tareas se vuelven difíciles de clasificar.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Listado de tareas/filtrado por estado/habilidad con paginación
|
||||
- Profundización en metadatos, eventos y artefactos de tareas
|
||||
- Punto final de cancelación de tarea y acción de UI con confirmación
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🌊 24. "Necesito métricas de transmisión activas para la carga A2A"</b></summary>
|
||||
|
||||
Los flujos de trabajo de streaming requieren información operativa sobre la simultaneidad y las conexiones en vivo.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Contadores de flujo activos integrados en el estado A2A
|
||||
- Marca de tiempo de la última tarea y recuentos por estado
|
||||
- Tarjetas de tablero A2A para monitoreo de operaciones en tiempo real
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🪪 25. "Necesito descubrimiento de agente estándar para clientes"</b></summary>
|
||||
|
||||
Los clientes y orquestadores externos necesitan metadatos legibles por máquina para la incorporación.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Tarjeta de agente expuesta en `/.well-known/agent.json`
|
||||
- Capacidades y habilidades mostradas en la interfaz de usuario de gestión.
|
||||
- La API de estado A2A incluye metadatos de descubrimiento para la automatización
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🧭 26. "Necesito capacidad de descubrimiento de protocolo en la UX del producto"</b></summary>
|
||||
|
||||
Si los usuarios no pueden descubrir las superficies de protocolo, la calidad de la adopción y el soporte disminuye.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Entradas de la barra lateral para MCP y A2A
|
||||
- Pestaña Protocolos de la página del endpoint con inicio rápido y estado
|
||||
- Enlaces desde la descripción general a paneles de gestión dedicados
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🧪 27. "Necesito validación de protocolo de extremo a extremo con clientes reales"</b></summary>
|
||||
|
||||
Las pruebas simuladas no son suficientes para validar la compatibilidad del protocolo antes del lanzamiento.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Suite E2E que inicia la aplicación y utiliza transporte de cliente MCP SDK real
|
||||
- Pruebas de cliente A2A para descubrimiento, envío, transmisión, obtención y cancelación de flujos
|
||||
- Verificar las afirmaciones con las API de auditoría MCP y tareas A2A.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>📡 28. "Necesito observabilidad unificada en todas las interfaces"</b></summary>
|
||||
|
||||
Dividir la observabilidad por protocolo crea puntos ciegos y MTTR más largos.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Paneles/registros/análisis unificados en un solo producto
|
||||
- Salud + auditoría + solicitud de telemetría en capas OpenAI, MCP y A2A
|
||||
- API operativas para estado y automatización.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>💼 29. "Necesito un tiempo de ejecución para proxy + herramientas + orquestación de agentes"</b></summary>
|
||||
|
||||
La ejecución de muchos servicios separados aumenta los costos operativos y los modos de falla.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Proxy compatible con OpenAI, servidor MCP y servidor A2A en una sola pila
|
||||
- Autenticación compartida, resiliencia, almacenamiento de datos y observabilidad.
|
||||
- Modelo de política consistente en todas las superficies de interacción.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>🚀 30. "Necesito enviar flujos de trabajo agentes sin expansión de código adhesivo"</b></summary>
|
||||
|
||||
Los equipos pierden velocidad al unir múltiples scripts y servicios ad hoc.
|
||||
|
||||
**Cómo lo resuelve OmniRoute:**
|
||||
|
||||
- Estrategia de endpoint unificada para clientes y agentes
|
||||
- UI de gestión de protocolos integradas y rutas de validación de humo
|
||||
- Fundamentos listos para producción (seguridad, registro, resiliencia, respaldo)
|
||||
|
||||
</details>
|
||||
|
||||
### Guías de ejemplo (casos de uso integrados)
|
||||
|
||||
**Libro de estrategias A: maximizar la suscripción paga + copia de seguridad económica**
|
||||
|
||||
```txt
|
||||
Combo: "maximize-claude"
|
||||
1. cc/claude-opus-4-6
|
||||
2. glm/glm-4.7
|
||||
3. if/kimi-k2-thinking
|
||||
|
||||
Monthly cost: $20 + small backup spend
|
||||
Outcome: higher quality, near-zero interruption
|
||||
```
|
||||
|
||||
**Libro de estrategias B: pila de codificación de costo cero**
|
||||
|
||||
```txt
|
||||
Combo: "free-forever"
|
||||
1. gc/gemini-3-flash
|
||||
2. if/kimi-k2-thinking
|
||||
3. qw/qwen3-coder-plus
|
||||
|
||||
Monthly cost: $0
|
||||
Outcome: stable free coding workflow
|
||||
```
|
||||
|
||||
**Libro de estrategias C: cadena alternativa siempre disponible las 24 horas del día, los 7 días de la semana**
|
||||
|
||||
```txt
|
||||
Combo: "always-on"
|
||||
1. cc/claude-opus-4-6
|
||||
2. cx/gpt-5.2-codex
|
||||
3. glm/glm-4.7
|
||||
4. minimax/MiniMax-M2.1
|
||||
5. if/kimi-k2-thinking
|
||||
|
||||
Outcome: deep fallback depth for deadline-critical workloads
|
||||
```
|
||||
|
||||
**Libro de jugadas D: Operaciones del agente con MCP + A2A**
|
||||
|
||||
```txt
|
||||
1) Start MCP transport (`omniroute --mcp`) for tool-driven operations
|
||||
2) Run A2A tasks via `message/send` and `message/stream`
|
||||
3) Observe via /dashboard/mcp and /dashboard/a2a
|
||||
4) Control incidents with resilience profile + task cancellation
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Inicio Rápido
|
||||
|
||||
**1. Instala globalmente:**
|
||||
@@ -247,6 +792,34 @@ docker compose --profile cli up -d
|
||||
|
||||
---
|
||||
|
||||
---
|
||||
|
||||
## 🖥️
|
||||
|
||||
> 🆕 **¡NUEVO!** OmniRoute ahora está disponible como **aplicación de escritorio nativa** para Windows, macOS y Linux.
|
||||
|
||||
Ejecuta OmniRoute como una aplicación de escritorio autónoma — sin terminal, sin navegador, sin internet necesario para modelos locales. La app basada en Electron incluye:
|
||||
|
||||
- 🖥️ **Ventana Nativa** — Ventana dedicada con integración en la bandeja del sistema
|
||||
- 🔄 **Inicio Automático** — Inicia OmniRoute al iniciar sesión
|
||||
- 🔔 **Notificaciones Nativas** — Recibe alertas sobre cuota o problemas de proveedores
|
||||
- ⚡ **Instalación con Un Clic** — NSIS (Windows), DMG (macOS), AppImage (Linux)
|
||||
- 🌐 **Modo Sin Conexión** — Funciona completamente offline con servidor incluido
|
||||
|
||||
### Inicio Rápido
|
||||
|
||||
```bash
|
||||
npm run electron:dev # Modo desarrollo
|
||||
npm run electron:build # Plataforma actual
|
||||
npm run electron:build:win # Windows (.exe)
|
||||
npm run electron:build:mac # macOS (.dmg)
|
||||
npm run electron:build:linux # Linux (.AppImage)
|
||||
```
|
||||
|
||||
📖 Documentación completa: [`electron/README.md`](electron/README.md)
|
||||
|
||||
---
|
||||
|
||||
## 💰 Precios Resumidos
|
||||
|
||||
| Tier | Proveedor | Costo | Reset de Cuota | Mejor Para |
|
||||
@@ -272,67 +845,6 @@ docker compose --profile cli up -d
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Casos de Uso
|
||||
|
||||
### Caso 1: "Tengo suscripción Claude Pro"
|
||||
|
||||
**Problema:** La cuota expira sin usar, límites de tasa durante programación intensa
|
||||
|
||||
```
|
||||
Combo: "maximize-claude"
|
||||
1. cc/claude-opus-4-6 (usar suscripción al máximo)
|
||||
2. glm/glm-4.7 (respaldo barato cuando la cuota se agota)
|
||||
3. if/kimi-k2-thinking (fallback de emergencia gratuito)
|
||||
|
||||
Costo mensual: $20 (suscripción) + ~$5 (respaldo) = $25 total
|
||||
vs. $20 + chocar con límites = frustración
|
||||
```
|
||||
|
||||
### Caso 2: "Quiero costo cero"
|
||||
|
||||
**Problema:** No puede pagar suscripciones, necesita IA confiable para programar
|
||||
|
||||
```
|
||||
Combo: "free-forever"
|
||||
1. gc/gemini-3-flash (180K gratis/mes)
|
||||
2. if/kimi-k2-thinking (ilimitado gratis)
|
||||
3. qw/qwen3-coder-plus (ilimitado gratis)
|
||||
|
||||
Costo mensual: $0
|
||||
Calidad: Modelos listos para producción
|
||||
```
|
||||
|
||||
### Caso 3: "Necesito programar 24/7, sin interrupciones"
|
||||
|
||||
**Problema:** Plazos ajustados, no puede permitirse tiempo de inactividad
|
||||
|
||||
```
|
||||
Combo: "always-on"
|
||||
1. cc/claude-opus-4-6 (mejor calidad)
|
||||
2. cx/gpt-5.2-codex (segunda suscripción)
|
||||
3. glm/glm-4.7 (barato, reset diario)
|
||||
4. minimax/MiniMax-M2.1 (más barato, reset 5h)
|
||||
5. if/kimi-k2-thinking (gratuito ilimitado)
|
||||
|
||||
Resultado: 5 capas de fallback = cero tiempo de inactividad
|
||||
```
|
||||
|
||||
### Caso 4: "Quiero IA GRATUITA en OpenClaw"
|
||||
|
||||
**Problema:** Necesita asistente de IA en apps de mensajería, completamente gratuito
|
||||
|
||||
```
|
||||
Combo: "openclaw-free"
|
||||
1. if/glm-4.7 (ilimitado gratis)
|
||||
2. if/minimax-m2.1 (ilimitado gratis)
|
||||
3. if/kimi-k2-thinking (ilimitado gratis)
|
||||
|
||||
Costo mensual: $0
|
||||
Acceso vía: WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 💡 Características Principales
|
||||
|
||||
### 🧠 Enrutamiento e Inteligencia
|
||||
@@ -348,6 +860,8 @@ Acceso vía: WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
| 🧩 **Modelos Personalizados** | Agrega cualquier ID de modelo a cualquier proveedor |
|
||||
| 🌐 **Enrutador Wildcard** | Enruta patrones `provider/*` a cualquier proveedor dinámicamente |
|
||||
| 🧠 **Presupuesto de Razonamiento** | Modos passthrough, auto, custom y adaptativo para modelos de razonamiento |
|
||||
| 🔀 **Model Aliases** | Auto-forward deprecated model IDs to current replacements (built-in + custom) |
|
||||
| ⚡ **Background Degradation** | Auto-route background tasks (titles, summaries) to cheaper models |
|
||||
| 💬 **Inyección de System Prompt** | System prompt global aplicado en todas las solicitudes |
|
||||
| 📄 **API Responses** | Soporte completo de la API Responses de OpenAI (`/v1/responses`) para Codex |
|
||||
|
||||
@@ -364,15 +878,18 @@ Acceso vía: WhatsApp, Telegram, Slack, Discord, iMessage, Signal...
|
||||
|
||||
### 🛡️ Resiliencia y Seguridad
|
||||
|
||||
| Característica | Qué Hace |
|
||||
| ---------------------------------- | ---------------------------------------------------------------- |
|
||||
| 🔌 **Circuit Breaker** | Auto-apertura/cierre por proveedor con umbrales configurables |
|
||||
| 🛡️ **Anti-Thundering Herd** | Mutex + semáforo rate-limit para proveedores con API key |
|
||||
| 🧠 **Caché Semántico** | Caché de dos niveles (firma + semántico) reduce costo y latencia |
|
||||
| ⚡ **Idempotencia de Solicitud** | Ventana de dedup de 5s para solicitudes duplicadas |
|
||||
| 🔒 **Spoofing de Fingerprint TLS** | Bypass de detección de bot vía TLS con wreq-js |
|
||||
| 🌐 **Filtrado de IP** | Allowlist/blocklist para control de acceso a la API |
|
||||
| 📊 **Rate Limits Editables** | RPM, gap mínimo y concurrencia máxima configurables |
|
||||
| Característica | Qué Hace |
|
||||
| ---------------------------------- | ---------------------------------------------------------------------------- |
|
||||
| 🔌 **Circuit Breaker** | Auto-apertura/cierre por proveedor con umbrales configurables |
|
||||
| 🎯 **Endpoint-Aware Models** | Custom models declare supported endpoints + API format |
|
||||
| 🛡️ **Anti-Thundering Herd** | Mutex + semáforo rate-limit para proveedores con API key |
|
||||
| 🧠 **Caché Semántico** | Caché de dos niveles (firma + semántico) reduce costo y latencia |
|
||||
| ⚡ **Idempotencia de Solicitud** | Ventana de dedup de 5s para solicitudes duplicadas |
|
||||
| 🔒 **Spoofing de Fingerprint TLS** | Bypass de detección de bot vía TLS con wreq-js |
|
||||
| 🌐 **Filtrado de IP** | Allowlist/blocklist para control de acceso a la API |
|
||||
| 📊 **Rate Limits Editables** | RPM, gap mínimo y concurrencia máxima configurables |
|
||||
| 💾 **Rate Limit Persistence** | Learned limits survive restarts via SQLite with 60s debounce + 24h staleness |
|
||||
| 🔄 **Token Refresh Resilience** | Per-provider circuit breaker (5 fails→30min) + 30s timeout per attempt |
|
||||
|
||||
### 📊 Observabilidad y Analytics
|
||||
|
||||
@@ -472,6 +989,27 @@ Traducción transparente entre formatos:
|
||||
|
||||
</details>
|
||||
|
||||
## 🧪 Evaluaciones (Evals)
|
||||
|
||||
OmniRoute incluye un framework de evaluación integrado para probar la calidad de respuestas de LLM contra un conjunto golden. Accede vía **Analytics → Evals** en el dashboard.
|
||||
|
||||
### Conjunto Golden Integrado
|
||||
|
||||
El "OmniRoute Golden Set" precargado contiene 10 casos de prueba que cubren:
|
||||
|
||||
- Saludos, matemáticas, geografía, generación de código
|
||||
- Conformidad de formato JSON, traducción, markdown
|
||||
- Rechazo de seguridad (contenido dañino), conteo, lógica booleana
|
||||
|
||||
### Estrategias de Evaluación
|
||||
|
||||
| Estrategia | Descripción | Ejemplo |
|
||||
| ---------- | ---------------------------------------------------- | -------------------------------- |
|
||||
| `exact` | La salida debe coincidir exactamente | `"4"` |
|
||||
| `contains` | La salida debe contener subcadena (case-insensitive) | `"Paris"` |
|
||||
| `regex` | La salida debe coincidir con el patrón regex | `"1.*2.*3"` |
|
||||
| `custom` | Función JS personalizada retorna true/false | `(output) => output.length > 10` |
|
||||
|
||||
---
|
||||
|
||||
## 📖 Guía de Configuración
|
||||
@@ -754,97 +1292,6 @@ Configuración → Configuración de API:
|
||||
|
||||
---
|
||||
|
||||
## 📊 Modelos Disponibles
|
||||
|
||||
<details>
|
||||
<summary><b>Ver todos los modelos disponibles</b></summary>
|
||||
|
||||
**Claude Code (`cc/`)** - Pro/Max:
|
||||
|
||||
- `cc/claude-opus-4-6`
|
||||
- `cc/claude-sonnet-4-5-20250929`
|
||||
- `cc/claude-haiku-4-5-20251001`
|
||||
|
||||
**Codex (`cx/`)** - Plus/Pro:
|
||||
|
||||
- `cx/gpt-5.2-codex`
|
||||
- `cx/gpt-5.1-codex-max`
|
||||
|
||||
**Gemini CLI (`gc/`)** - GRATUITO:
|
||||
|
||||
- `gc/gemini-3-flash-preview`
|
||||
- `gc/gemini-2.5-pro`
|
||||
|
||||
**GitHub Copilot (`gh/`)**:
|
||||
|
||||
- `gh/gpt-5`
|
||||
- `gh/claude-4.5-sonnet`
|
||||
|
||||
**NVIDIA NIM (`nvidia/`)** - Créditos GRATUITOS:
|
||||
|
||||
- `nvidia/llama-3.3-70b-instruct`
|
||||
- `nvidia/mistral-7b-instruct`
|
||||
- 50+ más modelos en [build.nvidia.com](https://build.nvidia.com)
|
||||
|
||||
**GLM (`glm/`)** - $0.6/1M:
|
||||
|
||||
- `glm/glm-4.7`
|
||||
|
||||
**MiniMax (`minimax/`)** - $0.2/1M:
|
||||
|
||||
- `minimax/MiniMax-M2.1`
|
||||
|
||||
**iFlow (`if/`)** - GRATUITO:
|
||||
|
||||
- `if/kimi-k2-thinking`
|
||||
- `if/qwen3-coder-plus`
|
||||
- `if/deepseek-r1`
|
||||
- `if/glm-4.7`
|
||||
- `if/minimax-m2`
|
||||
|
||||
**Qwen (`qw/`)** - GRATUITO:
|
||||
|
||||
- `qw/qwen3-coder-plus`
|
||||
- `qw/qwen3-coder-flash`
|
||||
|
||||
**Kiro (`kr/`)** - GRATUITO:
|
||||
|
||||
- `kr/claude-sonnet-4.5`
|
||||
- `kr/claude-haiku-4.5`
|
||||
|
||||
**OpenRouter (`or/`)** - 100+ modelos:
|
||||
|
||||
- `or/anthropic/claude-4-sonnet`
|
||||
- `or/google/gemini-2.5-pro`
|
||||
- Cualquier modelo de [openrouter.ai/models](https://openrouter.ai/models)
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Evaluaciones (Evals)
|
||||
|
||||
OmniRoute incluye un framework de evaluación integrado para probar la calidad de respuestas de LLM contra un conjunto golden. Accede vía **Analytics → Evals** en el dashboard.
|
||||
|
||||
### Conjunto Golden Integrado
|
||||
|
||||
El "OmniRoute Golden Set" precargado contiene 10 casos de prueba que cubren:
|
||||
|
||||
- Saludos, matemáticas, geografía, generación de código
|
||||
- Conformidad de formato JSON, traducción, markdown
|
||||
- Rechazo de seguridad (contenido dañino), conteo, lógica booleana
|
||||
|
||||
### Estrategias de Evaluación
|
||||
|
||||
| Estrategia | Descripción | Ejemplo |
|
||||
| ---------- | ---------------------------------------------------- | -------------------------------- |
|
||||
| `exact` | La salida debe coincidir exactamente | `"4"` |
|
||||
| `contains` | La salida debe contener subcadena (case-insensitive) | `"Paris"` |
|
||||
| `regex` | La salida debe coincidir con el patrón regex | `"1.*2.*3"` |
|
||||
| `custom` | Función JS personalizada retorna true/false | `(output) => output.length > 10` |
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Solución de Problemas
|
||||
|
||||
<details>
|
||||
@@ -900,7 +1347,7 @@ El "OmniRoute Golden Set" precargado contiene 10 casos de prueba que cubren:
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ Stack Tecnológico
|
||||
## 🛠️
|
||||
|
||||
- **Runtime**: Node.js 20+
|
||||
- **Lenguaje**: TypeScript 5.9 — **100% TypeScript** en `src/` y `open-sse/` (v1.0.6)
|
||||
@@ -931,17 +1378,7 @@ El "OmniRoute Golden Set" precargado contiene 10 casos de prueba que cubren:
|
||||
|
||||
---
|
||||
|
||||
## 📧 Soporte
|
||||
|
||||
> 💬 **¡Únete a la comunidad!** [Grupo WhatsApp](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t) — Obtén ayuda, comparte consejos y mantente al día.
|
||||
|
||||
- **Website**: [omniroute.online](https://omniroute.online)
|
||||
- **GitHub**: [github.com/diegosouzapw/OmniRoute](https://github.com/diegosouzapw/OmniRoute)
|
||||
- **Issues**: [github.com/diegosouzapw/OmniRoute/issues](https://github.com/diegosouzapw/OmniRoute/issues)
|
||||
- **WhatsApp**: [Grupo de la Comunidad](https://chat.whatsapp.com/JI7cDQ1GyaiDHhVBpLxf8b?mode=gi_t)
|
||||
- **Proyecto Original**: [9router por decolua](https://github.com/decolua/9router)
|
||||
|
||||
---
|
||||
## 🗺️
|
||||
|
||||
## 👥 Contribuidores
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user