feat(memory): optimize RAM usage for low-memory deployments

- Add Node.js heap limit (256MB default) via OMNIROUTE_MEMORY_MB
- Convert LRU caches to byte-based limits (2-4MB)
- Reduce in-memory buffer sizes: 500→200 entries
- Add .env file loading in CLI for global npm installs
- Add PM2 ecosystem.config.js example for deployment
- Document memory tuning env vars in USER_GUIDE.md

Estimated RAM reduction: ~60-70% (from ~512MB+ to ~150-200MB)
This commit is contained in:
benzntech
2026-02-23 14:16:47 +05:30
parent 5668e16fbf
commit 5a2fdacebe
9 changed files with 238 additions and 57 deletions
+21
View File
@@ -153,3 +153,24 @@ LOG_TO_FILE=true
# LOG_FILE_PATH=logs/application/app.log
# LOG_MAX_FILE_SIZE=50M
# LOG_RETENTION_DAYS=7
# ─────────────────────────────────────────────────────────────────────────────
# Memory Optimization (Low-RAM configurations)
# ─────────────────────────────────────────────────────────────────────────────
# Node.js heap limit in MB (default: 256 for Docker, system default for npm)
# OMNIROUTE_MEMORY_MB=256
# Prompt cache settings
# PROMPT_CACHE_MAX_SIZE=50
# PROMPT_CACHE_MAX_BYTES=2097152
# PROMPT_CACHE_TTL_MS=300000
# Semantic cache settings (temperature=0 responses)
# SEMANTIC_CACHE_MAX_SIZE=100
# SEMANTIC_CACHE_MAX_BYTES=4194304
# SEMANTIC_CACHE_TTL_MS=1800000
# In-memory log buffers
# PROXY_LOG_MAX_ENTRIES=200
# CALL_LOGS_MAX=200
# STREAM_HISTORY_MAX=50
+1
View File
@@ -19,6 +19,7 @@ LABEL org.opencontainers.image.title="omniroute" \
ENV NODE_ENV=production
ENV PORT=20128
ENV HOSTNAME=0.0.0.0
ENV NODE_OPTIONS="--max-old-space-size=256"
# Data directory inside Docker — must match the volume mount in docker-compose.yml
ENV DATA_DIR=/app/data
+68 -7
View File
@@ -12,15 +12,69 @@
*/
import { spawn } from "node:child_process";
import { existsSync } from "node:fs";
import { existsSync, readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { homedir, platform } from "node:os";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const ROOT = join(__dirname, "..");
const APP_DIR = join(ROOT, "app");
// ── Load .env file (for global npm install) ─────────────────
function loadEnvFile() {
const envPaths = [];
// 1. DATA_DIR/.env if set
if (process.env.DATA_DIR) {
envPaths.push(join(process.env.DATA_DIR, ".env"));
}
// 2. ~/.omniroute/.env (default data dir)
const home = homedir();
if (home) {
if (platform() === "win32") {
const appData = process.env.APPDATA || join(home, "AppData", "Roaming");
envPaths.push(join(appData, "omniroute", ".env"));
} else {
envPaths.push(join(home, ".omniroute", ".env"));
}
}
// 3. ./.env (current working directory)
envPaths.push(join(process.cwd(), ".env"));
for (const envPath of envPaths) {
try {
if (existsSync(envPath)) {
const content = readFileSync(envPath, "utf-8");
for (const line of content.split("\n")) {
const trimmed = line.trim();
// Skip empty lines and comments
if (!trimmed || trimmed.startsWith("#")) continue;
const eqIdx = trimmed.indexOf("=");
if (eqIdx > 0) {
const key = trimmed.slice(0, eqIdx).trim();
const value = trimmed.slice(eqIdx + 1).trim();
// Don't override existing env vars
if (process.env[key] === undefined) {
// Remove surrounding quotes
process.env[key] = value.replace(/^["']|["']$/g, "");
}
}
}
console.log(` \x1b[2m📋 Loaded env from ${envPath}\x1b[0m`);
return;
}
} catch {
// Ignore errors reading env files
}
}
}
loadEnvFile();
// ── Parse args ─────────────────────────────────────────────
const args = process.argv.slice(2);
@@ -35,6 +89,10 @@ if (args.includes("--help") || args.includes("-h")) {
omniroute --help Show this help
omniroute --version Show version
\x1b[1mConfig:\x1b[0m
Loads .env from: ~/.omniroute/.env or ./.env
Memory limit: OMNIROUTE_MEMORY_MB (default: 256)
\x1b[1mAfter starting:\x1b[0m
Dashboard: http://localhost:<port>
API: http://localhost:<port>/v1
@@ -74,11 +132,11 @@ const noOpen = args.includes("--no-open");
// ── Banner ─────────────────────────────────────────────────
console.log(`
\x1b[36m ____ _ ____ _
/ __ \\ (_) __ \\ | |
| | | |_ __ ___ _ __ _| |__) |___ _ _| |_ ___
| | | | '_ \` _ \\| '_ \\ | _ // _ \\| | | | __/ _ \\
| |__| | | | | | | | | | | | \\ \\ (_) | |_| | || __/
\\____/|_| |_| |_|_| |_|_|_| \\_\\___/ \\__,_|\\__\\___|
/ __ \\ (_) __ \\ | |
| | | |_ __ ___ _ __ _| |__) |___ _ _| |_ ___
| | | | '_ \` _ \\| '_ \\ | _ // _ \\| | | | __/ _ \\
| |__| | | | | | | | | | | | \\ \\ (_) | |_| | || __/
\\____/|_| |_| |_|_| |_|_|_| \\_\\___/ \\__,_|\\__\\___|
\x1b[0m`);
// ── Node.js version check ──────────────────────────────────
@@ -108,14 +166,17 @@ if (!existsSync(serverJs)) {
// ── Start server ───────────────────────────────────────────
console.log(` \x1b[2m⏳ Starting server...\x1b[0m\n`);
const memoryLimit = process.env.OMNIROUTE_MEMORY_MB || "256";
const env = {
...process.env,
PORT: String(port),
HOSTNAME: "0.0.0.0",
NODE_ENV: "production",
NODE_OPTIONS: `--max-old-space-size=${memoryLimit}`,
};
const server = spawn("node", [serverJs], {
const server = spawn("node", [`--max-old-space-size=${memoryLimit}`, serverJs], {
cwd: APP_DIR,
env,
stdio: "pipe",
+71 -14
View File
@@ -316,6 +316,25 @@ Model: cc/claude-opus-4-6
## 🚀 Deployment
### Global npm install (Recommended)
```bash
npm install -g omniroute
# Create config directory
mkdir -p ~/.omniroute
# Create .env file (see .env.example)
cp ~/.omniroute/.env
# Start server
omniroute
# Or with custom port:
omniroute --port 3000
```
The CLI automatically loads `.env` from `~/.omniroute/.env` or `./.env`.
### VPS Deployment
```bash
@@ -335,6 +354,41 @@ npm run start
# Or: pm2 start npm --name omniroute -- start
```
### PM2 Deployment (Low Memory)
For servers with limited RAM, use the memory limit option:
```bash
# With 256MB limit (default)
pm2 start npm --name omniroute -- start
# Or with custom memory limit
OMNIROUTE_MEMORY_MB=512 pm2 start npm --name omniroute -- start
# Or using ecosystem.config.js
pm2 start ecosystem.config.js
```
Create `ecosystem.config.js`:
```javascript
module.exports = {
apps: [{
name: 'omniroute',
script: 'npm',
args: 'start',
env: {
NODE_ENV: 'production',
OMNIROUTE_MEMORY_MB: '256',
JWT_SECRET: 'your-secret',
INITIAL_PASSWORD: 'your-password',
},
node_args: '--max-old-space-size=256',
max_memory_restart: '300M',
}]
};
```
### Docker
```bash
@@ -349,20 +403,23 @@ For host-integrated mode with CLI binaries, see the Docker section in the main d
### Environment Variables
| Variable | Default | Description |
| --------------------- | ------------------------------------ | ------------------------------------------------------- |
| `JWT_SECRET` | `omniroute-default-secret-change-me` | JWT signing secret (**change in production**) |
| `INITIAL_PASSWORD` | `123456` | First login password |
| `DATA_DIR` | `~/.omniroute` | Data directory (db, usage, logs) |
| `PORT` | framework default | Service port (`20128` in examples) |
| `HOSTNAME` | framework default | Bind host (Docker defaults to `0.0.0.0`) |
| `NODE_ENV` | runtime default | Set `production` for deploy |
| `BASE_URL` | `http://localhost:20128` | Server-side internal base URL |
| `CLOUD_URL` | `https://omniroute.dev` | Cloud sync endpoint base URL |
| `API_KEY_SECRET` | `endpoint-proxy-api-key-secret` | HMAC secret for generated API keys |
| `REQUIRE_API_KEY` | `false` | Enforce Bearer API key on `/v1/*` |
| `ENABLE_REQUEST_LOGS` | `false` | Enables request/response logs |
| `AUTH_COOKIE_SECURE` | `false` | Force `Secure` auth cookie (behind HTTPS reverse proxy) |
| Variable | Default | Description |
| ----------------------- | ------------------------------------ | ------------------------------------------------------- |
| `JWT_SECRET` | `omniroute-default-secret-change-me` | JWT signing secret (**change in production**) |
| `INITIAL_PASSWORD` | `123456` | First login password |
| `DATA_DIR` | `~/.omniroute` | Data directory (db, usage, logs) |
| `PORT` | framework default | Service port (`20128` in examples) |
| `HOSTNAME` | framework default | Bind host (Docker defaults to `0.0.0.0`) |
| `NODE_ENV` | runtime default | Set `production` for deploy |
| `BASE_URL` | `http://localhost:20128` | Server-side internal base URL |
| `CLOUD_URL` | `https://omniroute.dev` | Cloud sync endpoint base URL |
| `API_KEY_SECRET` | `endpoint-proxy-api-key-secret` | HMAC secret for generated API keys |
| `REQUIRE_API_KEY` | `false` | Enforce Bearer API key on `/v1/*` |
| `ENABLE_REQUEST_LOGS` | `false` | Enables request/response logs |
| `AUTH_COOKIE_SECURE` | `false` | Force `Secure` auth cookie (behind HTTPS reverse proxy) |
| `OMNIROUTE_MEMORY_MB` | `256` | Node.js heap limit in MB |
| `PROMPT_CACHE_MAX_SIZE` | `50` | Max prompt cache entries |
| `SEMANTIC_CACHE_MAX_SIZE`| `100` | Max semantic cache entries |
For the full environment variable reference, see the [README](../README.md).
+70 -30
View File
@@ -3,6 +3,7 @@
*
* In-memory LRU cache for LLM prompt/response pairs.
* Uses content hashing for cache keys to handle semantic deduplication.
* Memory-optimized with byte-based limits.
*
* @module lib/cacheLayer
*/
@@ -19,22 +20,30 @@ import crypto from "node:crypto";
* @property {number} hits - Number of times this entry was accessed
*/
const DEFAULT_MAX_ENTRIES = 50;
const DEFAULT_MAX_BYTES = 2 * 1024 * 1024;
const DEFAULT_TTL = 300000;
export class LRUCache {
/** @type {Map<string, CacheEntry>} */
#cache = new Map();
#maxSize;
#maxBytes;
#defaultTTL;
#currentSize = 0;
#currentBytes = 0;
#stats = { hits: 0, misses: 0, evictions: 0 };
/**
* @param {Object} options
* @param {number} [options.maxSize=100] - Max number of entries
* @param {number} [options.maxSize=50] - Max number of entries (reduced for memory)
* @param {number} [options.maxBytes=2097152] - Max bytes (default: 2MB)
* @param {number} [options.defaultTTL=300000] - Default TTL in ms (5 min)
*/
constructor(options: any = {}) {
this.#maxSize = options.maxSize ?? 100;
this.#defaultTTL = options.defaultTTL ?? 300000;
constructor(options: { maxSize?: number; maxBytes?: number; defaultTTL?: number } = {}) {
this.#maxSize = options.maxSize ?? DEFAULT_MAX_ENTRIES;
this.#maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES;
this.#defaultTTL = options.defaultTTL ?? DEFAULT_TTL;
}
/**
@@ -42,7 +51,7 @@ export class LRUCache {
* @param {Object} params - Parameters to hash
* @returns {string} Cache key
*/
static generateKey(params) {
static generateKey(params: Record<string, unknown>) {
const normalized = JSON.stringify(params, Object.keys(params).sort());
return crypto.createHash("sha256").update(normalized).digest("hex").slice(0, 16);
}
@@ -52,7 +61,7 @@ export class LRUCache {
* @param {string} key
* @returns {*|undefined}
*/
get(key) {
get(key: string) {
const entry = this.#cache.get(key);
if (!entry) {
@@ -60,15 +69,12 @@ export class LRUCache {
return undefined;
}
// Check TTL
if (Date.now() - entry.createdAt > entry.ttl) {
this.#cache.delete(key);
this.#currentSize--;
this.#deleteEntry(key, entry);
this.#stats.misses++;
return undefined;
}
// Move to end (most recently used)
this.#cache.delete(key);
entry.hits++;
this.#cache.set(key, entry);
@@ -83,18 +89,25 @@ export class LRUCache {
* @param {*} value
* @param {number} [ttl] - Override default TTL
*/
set(key, value, ttl) {
// If key exists, delete it first (will be re-added at end)
set(key: string, value: unknown, ttl?: number) {
const entrySize = this.#estimateSize(value);
if (this.#cache.has(key)) {
this.#cache.delete(key);
const oldEntry = this.#cache.get(key)!;
this.#currentBytes -= oldEntry.size || 0;
this.#currentSize--;
this.#cache.delete(key);
}
// Evict oldest entries if at capacity
while (this.#currentSize >= this.#maxSize) {
while (
(this.#currentSize >= this.#maxSize || this.#currentBytes + entrySize > this.#maxBytes) &&
this.#cache.size > 0
) {
const oldestKey = this.#cache.keys().next().value;
this.#cache.delete(oldestKey);
this.#currentSize--;
const oldestEntry = this.#cache.get(oldestKey);
if (oldestEntry) {
this.#deleteEntry(oldestKey, oldestEntry);
}
this.#stats.evictions++;
}
@@ -103,12 +116,34 @@ export class LRUCache {
value,
createdAt: Date.now(),
ttl: ttl ?? this.#defaultTTL,
size: JSON.stringify(value).length,
size: entrySize,
hits: 0,
};
this.#cache.set(key, entry);
this.#currentSize++;
this.#currentBytes += entrySize;
}
/**
* Estimate size of a value in bytes.
*/
#estimateSize(value: unknown): number {
try {
return JSON.stringify(value).length * 2;
} catch {
return 1024;
}
}
/**
* Delete an entry and update counters.
*/
#deleteEntry(key: string, entry: { size?: number }) {
this.#cache.delete(key);
this.#currentSize--;
this.#currentBytes -= entry.size || 0;
if (this.#currentBytes < 0) this.#currentBytes = 0;
}
/**
@@ -116,12 +151,11 @@ export class LRUCache {
* @param {string} key
* @returns {boolean}
*/
has(key) {
has(key: string) {
const entry = this.#cache.get(key);
if (!entry) return false;
if (Date.now() - entry.createdAt > entry.ttl) {
this.#cache.delete(key);
this.#currentSize--;
this.#deleteEntry(key, entry);
return false;
}
return true;
@@ -132,10 +166,10 @@ export class LRUCache {
* @param {string} key
* @returns {boolean}
*/
delete(key) {
if (this.#cache.has(key)) {
this.#cache.delete(key);
this.#currentSize--;
delete(key: string) {
const entry = this.#cache.get(key);
if (entry) {
this.#deleteEntry(key, entry);
return true;
}
return false;
@@ -145,14 +179,17 @@ export class LRUCache {
clear() {
this.#cache.clear();
this.#currentSize = 0;
this.#currentBytes = 0;
}
/** @returns {{ size: number, maxSize: number, hits: number, misses: number, evictions: number, hitRate: number }} */
/** @returns {{ size: number, maxSize: number, bytes: number, maxBytes: number, hits: number, misses: number, evictions: number, hitRate: number }} */
getStats() {
const total = this.#stats.hits + this.#stats.misses;
return {
size: this.#currentSize,
maxSize: this.#maxSize,
bytes: this.#currentBytes,
maxBytes: this.#maxBytes,
...this.#stats,
hitRate: total > 0 ? (this.#stats.hits / total) * 100 : 0,
};
@@ -161,18 +198,21 @@ export class LRUCache {
// ─── Prompt Cache Singleton ─────────────────
let promptCache;
let promptCache: LRUCache | null = null;
/**
* Get the global prompt cache instance.
* @param {Object} [options]
* @returns {LRUCache}
*/
export function getPromptCache(options?: any) {
export function getPromptCache(
options?: { maxSize?: number; maxBytes?: number; defaultTTL?: number } & Record<string, unknown>
) {
if (!promptCache) {
promptCache = new LRUCache({
maxSize: parseInt(process.env.PROMPT_CACHE_MAX_SIZE || "200", 10),
defaultTTL: parseInt(process.env.PROMPT_CACHE_TTL_MS || "600000", 10),
maxSize: parseInt(process.env.PROMPT_CACHE_MAX_SIZE || "50", 10),
maxBytes: parseInt(process.env.PROMPT_CACHE_MAX_BYTES || String(2 * 1024 * 1024), 10),
defaultTTL: parseInt(process.env.PROMPT_CACHE_TTL_MS || "300000", 10),
...options,
});
}
+1 -1
View File
@@ -11,7 +11,7 @@ import { getDbInstance, isCloud, isBuildPhase } from "./db/core";
const shouldPersistToDisk = !isCloud && !isBuildPhase;
const MAX_ENTRIES = 500;
const MAX_ENTRIES = parseInt(process.env.PROXY_LOG_MAX_ENTRIES || "200", 10);
interface ProxyInfo {
type: string;
+4 -3
View File
@@ -16,14 +16,15 @@ import { getDbInstance } from "./db/core";
// ─── Singleton ─────────────────
let memoryCache;
let memoryCache: LRUCache | null = null;
let stats = { hits: 0, misses: 0, tokensSaved: 0 };
function getMemoryCache() {
if (!memoryCache) {
memoryCache = new LRUCache({
maxSize: parseInt(process.env.SEMANTIC_CACHE_MAX_SIZE || "500", 10),
defaultTTL: parseInt(process.env.SEMANTIC_CACHE_TTL_MS || "3600000", 10), // 1h
maxSize: parseInt(process.env.SEMANTIC_CACHE_MAX_SIZE || "100", 10),
maxBytes: parseInt(process.env.SEMANTIC_CACHE_MAX_BYTES || String(4 * 1024 * 1024), 10),
defaultTTL: parseInt(process.env.SEMANTIC_CACHE_TTL_MS || "1800000", 10),
});
}
return memoryCache;
+1 -1
View File
@@ -12,7 +12,7 @@ import fs from "fs";
import { getDbInstance } from "../db/core";
import { shouldPersistToDisk, CALL_LOGS_DIR } from "./migrations";
const CALL_LOGS_MAX = 500;
const CALL_LOGS_MAX = parseInt(process.env.CALL_LOGS_MAX || "200", 10);
const LOG_RETENTION_DAYS = parseInt(process.env.LOG_RETENTION_DAYS || "7", 10);
/** Fields that should always be redacted from logged payloads */
+1 -1
View File
@@ -173,7 +173,7 @@ export class StreamTracker {
// ─── Active Stream Registry ─────────────────
const activeStreams = new Map<string, StreamTracker>();
const MAX_COMPLETED_HISTORY = 100;
const MAX_COMPLETED_HISTORY = parseInt(process.env.STREAM_HISTORY_MAX || "50", 10);
const completedStreams: ReturnType<StreamTracker["getSummary"]>[] = [];
/**