diff --git a/README.md b/README.md
index 91bdf20..a6c5eb1 100644
--- a/README.md
+++ b/README.md
@@ -125,7 +125,35 @@ You're done!
- I've pre-loaded Pyrite on the server. Just set your url to `http://localhost:42069/v1/pyrite`! This is meant for people who JUST installed a front and and don't have a real setup yet - it's nice to be able to celebrate your victory with something working right away!
- Read up on how SillyTavern handles caching: https://docs.sillytavern.app/administration/config-yaml/#claude-configuration
- It's off by default, turn it on with those configs. Choose depth 0 if you aren't sure; this caches the most aggressively.
- - What all those warnings mean is that for cache to be used, the convo history up to a certain point has to be the exact same. ST has a lot of advanced features where it makes changes to the start of the context, ruining your savings. But for simpler use cases, it's fine. Set the context to 200K IMO - as stuff falls out of context if you choose a lower number, that changes the convo start
+ - What all those warnings mean is that for cache to be used, the convo history up to a certain point has to be the exact same. ST has a lot of advanced features where it makes changes to the start of the context, ruining your savings. But for simpler use cases, it's fine. Set the context to 200K IMO - as stuff falls out of context if you choose a lower number, that changes the convo start
+
+### TypingMind Setup
+
+Unlike SillyTavern, TypingMind requires the **full endpoint URL** including `/v1/messages`.
+
+1. In TypingMind, go to **Model** > **Add Custom Model**
+2. Set **AI Type** to `Claude (messages API)`
+3. Set **Endpoint URL** to `http://localhost:42069/v1/messages` (note: full path required)
+4. Set **Model ID** to a valid Claude model, e.g. `claude-opus-4-5-20250514` or `claude-sonnet-4-20250514`
+5. Set **Context Length** to `200000` (same for all Claude models)
+6. Leave **Authentication** set to `None`
+7. (Optional) Add a PHI instruction - see below
+8. Click **Test**, then **Add Model** when the test succeeds
+
+#### Adding PHI (Post History Instruction)
+
+PHI allows you to inject ephemeral instructions that appear after the last user message but aren't stored in conversation history. This is useful for reply rules, formatting preferences, or per-request instructions.
+
+1. Expand **Advanced** > **Custom Body Params**
+2. Click **Add Body Parameter**
+3. Set type to `string`, key to `phi`, and value to your instruction
+
+Example PHI value:
+```
+Always respond concisely. Use markdown formatting.
+```
+
+The proxy strips the `phi` field from the request and injects it as a user message after your last message. Claude sees it, but TypingMind doesn't store it in the conversation history.
### Troubleshooting
@@ -155,6 +183,7 @@ Most likely thing to go wrong is not being able to find the credentials, either
- Remove "ttl" key from any "cache_control" objects, since endpoint does not allow it
- The first section of the system prompt must be "You are Claude Code, Anthropic's official CLI for Claude." or the request will not be accepted by Anthropic (specifically/technically, it must be the first item of the "system" array's "text" content). I am adding this, but this is just FYI so you know it's there and that you have to deal with it
- Optionally filter sampling parameters to avoid conflicts with Sonnet 4.5. Set `filter_sampling_params=true` in `server/config.txt` to enable this feature, which ensures only one sampling parameter is sent to the API. When both `temperature` and `top_p` are specified, it removes whichever is at the default value (1.0), or prefers temperature if both are non-default (Sonnet 4.5 doesn't allow both parameters). Other models work fine with both parameters, so this defaults to off
+- **PHI (Post History Instruction)**: If the request body contains a `phi` field, it's injected as a user message after the last user message, then removed from the request. This allows clients like TypingMind to send ephemeral instructions without native PHI support
### Smart Host Binding
- **Native execution**: Binds to `127.0.0.1` (secure, local-only)
diff --git a/server/ClaudeRequest.js b/server/ClaudeRequest.js
index 4a468d8..c8905fd 100644
--- a/server/ClaudeRequest.js
+++ b/server/ClaudeRequest.js
@@ -48,7 +48,7 @@ class ClaudeRequest {
constructor(req = null) {
this.API_URL = 'https://api.anthropic.com/v1/messages';
this.VERSION = '2023-06-01';
- this.BETA_HEADER = 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14';
+ this.BETA_HEADER = 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,web-search-2025-03-05,web-fetch-2025-09-10';
const apiKey = req?.headers?.['x-api-key'];
if (apiKey && apiKey.includes('sk-ant')) {
@@ -225,7 +225,7 @@ class ClaudeRequest {
if (ClaudeRequest.refreshPromise) {
return await ClaudeRequest.refreshPromise;
}
-
+
ClaudeRequest.refreshPromise = this._doRefresh();
try {
const result = await ClaudeRequest.refreshPromise;
@@ -286,20 +286,20 @@ class ClaudeRequest {
req.write(JSON.stringify(refreshData));
req.end();
});
-
+
credentials.claudeAiOauth.accessToken = response.access_token;
credentials.claudeAiOauth.refreshToken = response.refresh_token;
credentials.claudeAiOauth.expiresAt = Date.now() + (response.expires_in * 1000);
-
+
const credentialsJson = JSON.stringify(credentials);
this.writeCredentialsToFile(credentialsJson);
-
+
Logger.info('Token refreshed successfully');
return `Bearer ${response.access_token}`;
-
+
} catch (error) {
if (error.code === 'ENOENT') {
- const errorMsg = process.platform === 'win32'
+ const errorMsg = process.platform === 'win32'
? 'Failed to load credentials: Claude credentials file not found in WSL. Check your default WSL distro with "wsl -l -v" and set the correct one with "wsl --set-default ". As a backup, you can get the token from ~/.claude/.credentials.json and pass it as x-api-key (proxy password in SillyTavern)'
: 'Claude credentials not found. Please ensure Claude Code is installed and you have logged in. As a backup, you can get the token from ~/.claude/.credentials.json and pass it as x-api-key (proxy password in SillyTavern)';
Logger.error('ENOENT error during token refresh:', errorMsg);
@@ -342,7 +342,7 @@ class ClaudeRequest {
if (Array.isArray(body.system)) {
body.system.unshift(systemPrompt);
} else {
- body.system = [systemPrompt, body.system];
+ body.system = [systemPrompt, { type: 'text', text: body.system }];
}
} else {
body.system = [systemPrompt];
@@ -352,6 +352,10 @@ class ClaudeRequest {
this.applyPreset(body, presetName);
}
+ if (body.phi !== undefined || body.PHI !== undefined) {
+ this.injectPHI(body);
+ }
+
body = this.stripTtlFromCacheControl(body);
body = this.filterSamplingParams(body);
@@ -394,7 +398,7 @@ class ClaudeRequest {
// Use suffixEt only when thinking is enabled, otherwise use regular suffix
const hasThinking = body.thinking && body.thinking.type === 'enabled';
const suffix = hasThinking ? preset.suffixEt : preset.suffix;
-
+
if (suffix && body.messages && body.messages.length > 0) {
const lastUserIndex = body.messages.map(m => m.role).lastIndexOf('user');
if (lastUserIndex !== -1) {
@@ -409,6 +413,24 @@ class ClaudeRequest {
Logger.debug(`Applied preset: ${presetName}`);
}
+ injectPHI(body) {
+ const phi = body.phi || body.PHI;
+ delete body.phi; // Always remove before sending to Claude API
+ delete body.PHI;
+
+ if (phi && body.messages && body.messages.length > 0) {
+ const lastUserIndex = body.messages.map(m => m.role).lastIndexOf('user');
+ if (lastUserIndex !== -1) {
+ const phiMsg = {
+ role: 'user',
+ content: [{ type: 'text', text: phi }]
+ };
+ body.messages.splice(lastUserIndex + 1, 0, phiMsg);
+ Logger.debug('Injected PHI message');
+ }
+ }
+ }
+
async makeRequest(body, presetName = null) {
const token = await this.getAuthToken();
const headers = this.getHeaders(token);
@@ -435,7 +457,7 @@ class ClaudeRequest {
req.destroy();
reject(err);
});
-
+
req.write(JSON.stringify(processedBody));
req.end();
});
@@ -444,11 +466,11 @@ class ClaudeRequest {
async handleResponse(res, body, presetName = null) {
try {
const claudeResponse = await this.makeRequest(body, presetName);
-
+
if (claudeResponse.statusCode === 401) {
Logger.info('Got 401, checking credential store');
ClaudeRequest.cachedToken = null;
-
+
try {
const newToken = await this.loadOrRefreshToken();
ClaudeRequest.cachedToken = newToken;
@@ -465,16 +487,16 @@ class ClaudeRequest {
Logger.info('Token load/refresh failed, passing 401 to client');
}
}
-
+
res.statusCode = claudeResponse.statusCode;
Logger.debug(`Claude API status: ${claudeResponse.statusCode}`);
Logger.debug('Claude response headers:', JSON.stringify(claudeResponse.headers, null, 2));
Object.keys(claudeResponse.headers).forEach(key => {
res.setHeader(key, claudeResponse.headers[key]);
});
-
+
this.streamResponse(res, claudeResponse);
-
+
} catch (error) {
console.error('Claude request error:', error.message);
res.writeHead(500, { 'Content-Type': 'application/json' });
@@ -507,7 +529,7 @@ class ClaudeRequest {
const contentType = claudeResponse.headers['content-type'] || '';
if (contentType.includes('text/event-stream')) {
Logger.debug('Outgoing response headers to client:', JSON.stringify(res.getHeaders(), null, 2));
-
+
claudeResponse.on('error', (err) => {
Logger.debug('Claude response stream error:', err);
if (!res.headersSent) {
@@ -517,17 +539,17 @@ class ClaudeRequest {
res.end(JSON.stringify({ error: 'Upstream response error' }));
}
});
-
+
res.on('close', () => {
Logger.debug('Client disconnected, cleaning up streams');
if (!claudeResponse.destroyed) {
claudeResponse.destroy();
}
});
-
+
if (Logger.getLogLevel() >= 3) {
const debugStream = Logger.createDebugStream('Claude SSE', extractClaudeText);
-
+
debugStream.on('error', (err) => {
Logger.debug('Debug stream error:', err);
if (!res.headersSent) {
@@ -537,7 +559,7 @@ class ClaudeRequest {
res.end(JSON.stringify({ error: 'Stream processing error' }));
}
});
-
+
claudeResponse.pipe(debugStream).pipe(res);
debugStream.on('end', () => {
Logger.debug('\n');
diff --git a/server/server.js b/server/server.js
index b4c5bc8..e3ac92c 100644
--- a/server/server.js
+++ b/server/server.js
@@ -127,7 +127,7 @@ async function handleRequest(req, res) {
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization, X-Requested-With');
+ res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization, X-Requested-With, anthropic-version, anthropic-beta, x-api-key');
if (req.method === 'OPTIONS') {
res.writeHead(200);