diff --git a/README.md b/README.md index 91bdf20..a6c5eb1 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,35 @@ You're done! - I've pre-loaded Pyrite on the server. Just set your url to `http://localhost:42069/v1/pyrite`! This is meant for people who JUST installed a front and and don't have a real setup yet - it's nice to be able to celebrate your victory with something working right away! - Read up on how SillyTavern handles caching: https://docs.sillytavern.app/administration/config-yaml/#claude-configuration - It's off by default, turn it on with those configs. Choose depth 0 if you aren't sure; this caches the most aggressively. - - What all those warnings mean is that for cache to be used, the convo history up to a certain point has to be the exact same. ST has a lot of advanced features where it makes changes to the start of the context, ruining your savings. But for simpler use cases, it's fine. Set the context to 200K IMO - as stuff falls out of context if you choose a lower number, that changes the convo start + - What all those warnings mean is that for cache to be used, the convo history up to a certain point has to be the exact same. ST has a lot of advanced features where it makes changes to the start of the context, ruining your savings. But for simpler use cases, it's fine. Set the context to 200K IMO - as stuff falls out of context if you choose a lower number, that changes the convo start + +### TypingMind Setup + +Unlike SillyTavern, TypingMind requires the **full endpoint URL** including `/v1/messages`. + +1. In TypingMind, go to **Model** > **Add Custom Model** +2. Set **AI Type** to `Claude (messages API)` +3. Set **Endpoint URL** to `http://localhost:42069/v1/messages` (note: full path required) +4. Set **Model ID** to a valid Claude model, e.g. `claude-opus-4-5-20250514` or `claude-sonnet-4-20250514` +5. Set **Context Length** to `200000` (same for all Claude models) +6. Leave **Authentication** set to `None` +7. (Optional) Add a PHI instruction - see below +8. Click **Test**, then **Add Model** when the test succeeds + +#### Adding PHI (Post History Instruction) + +PHI allows you to inject ephemeral instructions that appear after the last user message but aren't stored in conversation history. This is useful for reply rules, formatting preferences, or per-request instructions. + +1. Expand **Advanced** > **Custom Body Params** +2. Click **Add Body Parameter** +3. Set type to `string`, key to `phi`, and value to your instruction + +Example PHI value: +``` +Always respond concisely. Use markdown formatting. +``` + +The proxy strips the `phi` field from the request and injects it as a user message after your last message. Claude sees it, but TypingMind doesn't store it in the conversation history. ### Troubleshooting @@ -155,6 +183,7 @@ Most likely thing to go wrong is not being able to find the credentials, either - Remove "ttl" key from any "cache_control" objects, since endpoint does not allow it - The first section of the system prompt must be "You are Claude Code, Anthropic's official CLI for Claude." or the request will not be accepted by Anthropic (specifically/technically, it must be the first item of the "system" array's "text" content). I am adding this, but this is just FYI so you know it's there and that you have to deal with it - Optionally filter sampling parameters to avoid conflicts with Sonnet 4.5. Set `filter_sampling_params=true` in `server/config.txt` to enable this feature, which ensures only one sampling parameter is sent to the API. When both `temperature` and `top_p` are specified, it removes whichever is at the default value (1.0), or prefers temperature if both are non-default (Sonnet 4.5 doesn't allow both parameters). Other models work fine with both parameters, so this defaults to off +- **PHI (Post History Instruction)**: If the request body contains a `phi` field, it's injected as a user message after the last user message, then removed from the request. This allows clients like TypingMind to send ephemeral instructions without native PHI support ### Smart Host Binding - **Native execution**: Binds to `127.0.0.1` (secure, local-only) diff --git a/server/ClaudeRequest.js b/server/ClaudeRequest.js index 4a468d8..c8905fd 100644 --- a/server/ClaudeRequest.js +++ b/server/ClaudeRequest.js @@ -48,7 +48,7 @@ class ClaudeRequest { constructor(req = null) { this.API_URL = 'https://api.anthropic.com/v1/messages'; this.VERSION = '2023-06-01'; - this.BETA_HEADER = 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14'; + this.BETA_HEADER = 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,web-search-2025-03-05,web-fetch-2025-09-10'; const apiKey = req?.headers?.['x-api-key']; if (apiKey && apiKey.includes('sk-ant')) { @@ -225,7 +225,7 @@ class ClaudeRequest { if (ClaudeRequest.refreshPromise) { return await ClaudeRequest.refreshPromise; } - + ClaudeRequest.refreshPromise = this._doRefresh(); try { const result = await ClaudeRequest.refreshPromise; @@ -286,20 +286,20 @@ class ClaudeRequest { req.write(JSON.stringify(refreshData)); req.end(); }); - + credentials.claudeAiOauth.accessToken = response.access_token; credentials.claudeAiOauth.refreshToken = response.refresh_token; credentials.claudeAiOauth.expiresAt = Date.now() + (response.expires_in * 1000); - + const credentialsJson = JSON.stringify(credentials); this.writeCredentialsToFile(credentialsJson); - + Logger.info('Token refreshed successfully'); return `Bearer ${response.access_token}`; - + } catch (error) { if (error.code === 'ENOENT') { - const errorMsg = process.platform === 'win32' + const errorMsg = process.platform === 'win32' ? 'Failed to load credentials: Claude credentials file not found in WSL. Check your default WSL distro with "wsl -l -v" and set the correct one with "wsl --set-default ". As a backup, you can get the token from ~/.claude/.credentials.json and pass it as x-api-key (proxy password in SillyTavern)' : 'Claude credentials not found. Please ensure Claude Code is installed and you have logged in. As a backup, you can get the token from ~/.claude/.credentials.json and pass it as x-api-key (proxy password in SillyTavern)'; Logger.error('ENOENT error during token refresh:', errorMsg); @@ -342,7 +342,7 @@ class ClaudeRequest { if (Array.isArray(body.system)) { body.system.unshift(systemPrompt); } else { - body.system = [systemPrompt, body.system]; + body.system = [systemPrompt, { type: 'text', text: body.system }]; } } else { body.system = [systemPrompt]; @@ -352,6 +352,10 @@ class ClaudeRequest { this.applyPreset(body, presetName); } + if (body.phi !== undefined || body.PHI !== undefined) { + this.injectPHI(body); + } + body = this.stripTtlFromCacheControl(body); body = this.filterSamplingParams(body); @@ -394,7 +398,7 @@ class ClaudeRequest { // Use suffixEt only when thinking is enabled, otherwise use regular suffix const hasThinking = body.thinking && body.thinking.type === 'enabled'; const suffix = hasThinking ? preset.suffixEt : preset.suffix; - + if (suffix && body.messages && body.messages.length > 0) { const lastUserIndex = body.messages.map(m => m.role).lastIndexOf('user'); if (lastUserIndex !== -1) { @@ -409,6 +413,24 @@ class ClaudeRequest { Logger.debug(`Applied preset: ${presetName}`); } + injectPHI(body) { + const phi = body.phi || body.PHI; + delete body.phi; // Always remove before sending to Claude API + delete body.PHI; + + if (phi && body.messages && body.messages.length > 0) { + const lastUserIndex = body.messages.map(m => m.role).lastIndexOf('user'); + if (lastUserIndex !== -1) { + const phiMsg = { + role: 'user', + content: [{ type: 'text', text: phi }] + }; + body.messages.splice(lastUserIndex + 1, 0, phiMsg); + Logger.debug('Injected PHI message'); + } + } + } + async makeRequest(body, presetName = null) { const token = await this.getAuthToken(); const headers = this.getHeaders(token); @@ -435,7 +457,7 @@ class ClaudeRequest { req.destroy(); reject(err); }); - + req.write(JSON.stringify(processedBody)); req.end(); }); @@ -444,11 +466,11 @@ class ClaudeRequest { async handleResponse(res, body, presetName = null) { try { const claudeResponse = await this.makeRequest(body, presetName); - + if (claudeResponse.statusCode === 401) { Logger.info('Got 401, checking credential store'); ClaudeRequest.cachedToken = null; - + try { const newToken = await this.loadOrRefreshToken(); ClaudeRequest.cachedToken = newToken; @@ -465,16 +487,16 @@ class ClaudeRequest { Logger.info('Token load/refresh failed, passing 401 to client'); } } - + res.statusCode = claudeResponse.statusCode; Logger.debug(`Claude API status: ${claudeResponse.statusCode}`); Logger.debug('Claude response headers:', JSON.stringify(claudeResponse.headers, null, 2)); Object.keys(claudeResponse.headers).forEach(key => { res.setHeader(key, claudeResponse.headers[key]); }); - + this.streamResponse(res, claudeResponse); - + } catch (error) { console.error('Claude request error:', error.message); res.writeHead(500, { 'Content-Type': 'application/json' }); @@ -507,7 +529,7 @@ class ClaudeRequest { const contentType = claudeResponse.headers['content-type'] || ''; if (contentType.includes('text/event-stream')) { Logger.debug('Outgoing response headers to client:', JSON.stringify(res.getHeaders(), null, 2)); - + claudeResponse.on('error', (err) => { Logger.debug('Claude response stream error:', err); if (!res.headersSent) { @@ -517,17 +539,17 @@ class ClaudeRequest { res.end(JSON.stringify({ error: 'Upstream response error' })); } }); - + res.on('close', () => { Logger.debug('Client disconnected, cleaning up streams'); if (!claudeResponse.destroyed) { claudeResponse.destroy(); } }); - + if (Logger.getLogLevel() >= 3) { const debugStream = Logger.createDebugStream('Claude SSE', extractClaudeText); - + debugStream.on('error', (err) => { Logger.debug('Debug stream error:', err); if (!res.headersSent) { @@ -537,7 +559,7 @@ class ClaudeRequest { res.end(JSON.stringify({ error: 'Stream processing error' })); } }); - + claudeResponse.pipe(debugStream).pipe(res); debugStream.on('end', () => { Logger.debug('\n'); diff --git a/server/server.js b/server/server.js index b4c5bc8..e3ac92c 100644 --- a/server/server.js +++ b/server/server.js @@ -127,7 +127,7 @@ async function handleRequest(req, res) { res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS'); - res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization, X-Requested-With'); + res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization, X-Requested-With, anthropic-version, anthropic-beta, x-api-key'); if (req.method === 'OPTIONS') { res.writeHead(200);