Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions crates/atomic-core/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1530,7 +1530,16 @@ where
};

// Build message history for API
let mut api_messages = vec![Message::system(get_system_prompt(&scope_description))];
let custom_chat_prefix = settings_map
.get("chat_prompt")
.filter(|s| !s.is_empty())
.map(|s| s.as_str());
let base_system = get_system_prompt(&scope_description);
let system_prompt = match custom_chat_prefix {
Some(prefix) => format!("{prefix}\n\n{base_system}"),
None => base_system,
};
let mut api_messages = vec![Message::system(system_prompt)];
api_messages.extend(messages);

// Truncate to fit context window for providers with limited context
Expand Down Expand Up @@ -1734,7 +1743,15 @@ where
};

// Build message history for API, with canvas context appended to system prompt
let mut system_prompt = get_system_prompt(&scope_description);
let custom_chat_prefix = settings_map
.get("chat_prompt")
.filter(|s| !s.is_empty())
.map(|s| s.as_str());
let base_system = get_system_prompt(&scope_description);
let mut system_prompt = match custom_chat_prefix {
Some(prefix) => format!("{prefix}\n\n{base_system}"),
None => base_system,
};
if page_context.is_some() {
system_prompt.push_str(get_page_context_system_prompt());
}
Expand Down
13 changes: 9 additions & 4 deletions crates/atomic-core/src/briefing/agentic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ struct AgentState {
done_called: bool,
}

async fn resolve_model(core: &AtomicCore) -> Result<(ProviderConfig, String), String> {
async fn resolve_model(core: &AtomicCore) -> Result<(ProviderConfig, String, Option<String>), String> {
let settings = core
.get_settings()
.await
Expand All @@ -336,7 +336,11 @@ async fn resolve_model(core: &AtomicCore) -> Result<(ProviderConfig, String), St
.cloned()
.unwrap_or_else(|| "anthropic/claude-sonnet-4.6".to_string()),
};
Ok((config, model))
let custom_prompt = settings
.get("briefing_prompt")
.filter(|s| !s.is_empty())
.cloned();
Ok((config, model, custom_prompt))
}

async fn run_research(
Expand Down Expand Up @@ -501,14 +505,15 @@ pub(crate) async fn generate(
new_atoms: &[AtomWithTags],
total_new: i32,
) -> Result<(String, Vec<(i32, String, String)>), String> {
let (provider_config, model) = resolve_model(core).await?;
let (provider_config, model, custom_system_prompt) = resolve_model(core).await?;
tracing::info!(model = %model, atoms = new_atoms.len(), "[briefing/agentic] Running agent");

let user_prompt = build_user_prompt(since, new_atoms, total_new);

let system = custom_system_prompt.as_deref().unwrap_or(SYSTEM_PROMPT);
let mut state = AgentState {
messages: vec![
Message::system(SYSTEM_PROMPT.to_string()),
Message::system(system.to_string()),
Message::user(user_prompt),
],
done_called: false,
Expand Down
6 changes: 4 additions & 2 deletions crates/atomic-core/src/chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub fn get_conversation_tags(
conversation_id: &str,
) -> Result<Vec<Tag>, AtomicCoreError> {
let mut stmt = conn.prepare(
"SELECT t.id, t.name, t.parent_id, t.created_at, t.is_autotag_target
"SELECT t.id, t.name, t.parent_id, t.created_at, t.is_autotag_target, t.autotag_description
FROM tags t
JOIN conversation_tags ct ON ct.tag_id = t.id
WHERE ct.conversation_id = ?1
Expand All @@ -47,6 +47,7 @@ pub fn get_conversation_tags(
parent_id: row.get(2)?,
created_at: row.get(3)?,
is_autotag_target: row.get::<_, i32>(4)? != 0,
autotag_description: row.get(5)?,
})
})?
.collect::<Result<Vec<_>, _>>()?;
Expand Down Expand Up @@ -292,7 +293,7 @@ fn batch_fetch_conversation_tags(
}
let placeholders = conv_ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
let query = format!(
"SELECT ct.conversation_id, t.id, t.name, t.parent_id, t.created_at, t.is_autotag_target
"SELECT ct.conversation_id, t.id, t.name, t.parent_id, t.created_at, t.is_autotag_target, t.autotag_description
FROM conversation_tags ct
JOIN tags t ON ct.tag_id = t.id
WHERE ct.conversation_id IN ({})
Expand All @@ -310,6 +311,7 @@ fn batch_fetch_conversation_tags(
parent_id: row.get(3)?,
created_at: row.get(4)?,
is_autotag_target: row.get::<_, i32>(5)? != 0,
autotag_description: row.get(6)?,
},
))
})?;
Expand Down
21 changes: 20 additions & 1 deletion crates/atomic-core/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ impl Database {
/// 1. Add a new `if version < N` block at the end (before the virtual-table section)
/// 2. End the block with `PRAGMA user_version = N;`
/// 3. Bump LATEST_VERSION
const LATEST_VERSION: i32 = 15;
const LATEST_VERSION: i32 = 16;

pub fn run_migrations(conn: &Connection) -> Result<(), AtomicCoreError> {
Self::run_migrations_internal(conn, false)
Expand Down Expand Up @@ -797,6 +797,25 @@ impl Database {
conn.execute_batch("PRAGMA user_version = 15;")?;
}

// --- V15 → V16: Per-target auto-tag guidance ---
if version < 16 {
let has_col: bool = conn
.query_row(
"SELECT 1 FROM pragma_table_info('tags') WHERE name='autotag_description'",
[],
|_| Ok(true),
)
.unwrap_or(false);

if !has_col {
conn.execute_batch(
"ALTER TABLE tags ADD COLUMN autotag_description TEXT NOT NULL DEFAULT '';",
)?;
}

conn.execute_batch("PRAGMA user_version = 16;")?;
}

// --- Triggers (recreated every startup to stay current) ---
conn.execute_batch(
"DROP TRIGGER IF EXISTS atom_tags_insert_count;
Expand Down
9 changes: 8 additions & 1 deletion crates/atomic-core/src/embedding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -792,13 +792,18 @@ async fn process_tagging_only_inner(
return Ok(TaggingOutcome::Skipped);
}

let custom_tagging_prompt = settings_map
.get("tagging_prompt")
.filter(|s| !s.is_empty())
.map(|s| s.as_str());
let tags = run_tagging_strategy(
tagging_strategy,
&provider_config,
&content,
&tag_tree_json,
&tagging_model,
supported_params,
custom_tagging_prompt,
)
.await?;

Expand Down Expand Up @@ -852,14 +857,14 @@ async fn process_tagging_only_inner(
new_tags_created: all_new_tag_ids,
})
}

async fn run_tagging_strategy(
strategy: TaggingStrategy,
provider_config: &ProviderConfig,
content: &str,
tag_tree_json: &str,
model: &str,
supported_params: Option<Vec<String>>,
custom_system_prompt: Option<&str>,
) -> Result<Vec<crate::extraction::TagApplication>, String> {
match strategy {
TaggingStrategy::TruncatedFullContent => {
Expand All @@ -869,6 +874,7 @@ async fn run_tagging_strategy(
tag_tree_json,
model,
supported_params,
custom_system_prompt,
)
.await
}
Expand All @@ -882,6 +888,7 @@ async fn run_tagging_strategy(
tag_tree_json,
model,
supported_params,
custom_system_prompt,
)
.await
}
Expand Down
100 changes: 93 additions & 7 deletions crates/atomic-core/src/extraction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,41 @@ Guidelines:
- Every tag must have a valid parent_name from the top-level categories listed below
- If none of the categories below feel like a natural fit for the content, return an empty tag list rather than forcing a poor match"#;

const SYSTEM_PROMPT_WITH_GUIDANCE: &str = r#"You are a knowledge management assistant that categorizes text with tags.

PURPOSE OF TAGS:
Tags help users navigate and filter their content. Users can browse by tag and generate wiki articles that synthesize all content under a tag. Only add a tag if you believe strongly that the user would want this content categorized and filterable by that tag.

IMPORTANT:
- Each tag MUST have a parent_name set to one of the existing top-level categories shown below
- DO NOT create new top-level categories - only use the ones the user has provided below
- Tag names are case-insensitive and globally unique

The user has chosen which top-level categories the auto-tagger may extend. They are listed below with optional guidance and a sample of existing sub-tags under each, as a point of reference for the kinds of tags in this system.

HIERARCHY STRUCTURE:
- Level 1: Categories (shown below) - use ONLY these existing categories as parent_name
- Level 2: Specific tags you create under those categories
- Maximum 2 levels - no deeper nesting

RESPONSE FORMAT:
Return a JSON object with a "tags" array. Each tag is an object with "name" and "parent_name", where parent_name is one of the categories shown below:
{"tags": [{"name": "<specific tag>", "parent_name": "<category from the list below>"}]}

Guidelines:
- Create new Level 2 tags under the user's existing categories when needed
- Prefer broad tags rather than overly specific ones (e.g., "John Smith" instead of "Early Life of John Smith")
- Every tag must have a valid parent_name from the top-level categories listed below
- If none of the categories below feel like a natural fit for the content, return an empty tag list rather than forcing a poor match"#;

fn default_system_prompt_for_tag_tree(tag_tree_json: &str) -> &'static str {
if tag_tree_json.contains("\nDescription: ") {
SYSTEM_PROMPT_WITH_GUIDANCE
} else {
SYSTEM_PROMPT
}
}

/// JSON schema for tag extraction calls. Shared by `extract_tags_from_content`
/// and `extract_tags_from_chunk`. Kept portable: all properties required,
/// `additionalProperties: false`, no unions. See `providers::structured::lint_schema`
Expand Down Expand Up @@ -261,8 +296,8 @@ pub async fn extract_tags_from_content(
tag_tree_json: &str,
model: &str,
supported_params: Option<Vec<String>>,
custom_system_prompt: Option<&str>,
) -> Result<Vec<TagApplication>, String> {
// Truncate based on provider's context length
let max_chars = max_tagging_chars(provider_config, tag_tree_json, model);
let text = if content.len() > max_chars {
// Find the nearest char boundary at or before max_chars
Expand All @@ -280,7 +315,10 @@ pub async fn extract_tags_from_content(
tag_tree_json, text
);

let messages = vec![Message::system(SYSTEM_PROMPT), Message::user(user_content)];
let system = custom_system_prompt
.filter(|s| !s.is_empty())
.unwrap_or_else(|| default_system_prompt_for_tag_tree(tag_tree_json));
let messages = vec![Message::system(system), Message::user(user_content)];

let call = StructuredCall::<ExtractionResult>::new(
provider_config,
Expand Down Expand Up @@ -314,7 +352,10 @@ pub async fn extract_tags_from_chunk(
tag_tree_json, chunk_content
);

let messages = vec![Message::system(SYSTEM_PROMPT), Message::user(user_content)];
let messages = vec![
Message::system(default_system_prompt_for_tag_tree(tag_tree_json)),
Message::user(user_content),
];

let call = StructuredCall::<ExtractionResult>::new(
provider_config,
Expand Down Expand Up @@ -347,11 +388,16 @@ pub fn get_tag_tree_for_llm(conn: &Connection) -> Result<String, String> {
// Tags without is_autotag_target = 1 are intentionally excluded so the
// auto-tagger only extends categories the user has opted into.
let mut top_level_stmt = conn
.prepare("SELECT id, name FROM tags WHERE parent_id IS NULL AND is_autotag_target = 1 ORDER BY name")
.prepare(
"SELECT id, name, autotag_description
FROM tags
WHERE parent_id IS NULL AND is_autotag_target = 1
ORDER BY name",
)
.map_err(|e| format!("Failed to prepare top-level tag query: {}", e))?;

let top_level_tags: Vec<(String, String)> = top_level_stmt
.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
let top_level_tags: Vec<(String, String, String)> = top_level_stmt
.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
.map_err(|e| format!("Failed to query top-level tags: {}", e))?
.collect::<Result<Vec<_>, _>>()
.map_err(|e| format!("Failed to collect top-level tags: {}", e))?;
Expand All @@ -363,10 +409,16 @@ pub fn get_tag_tree_for_llm(conn: &Connection) -> Result<String, String> {
// Step 2: For each top-level tag, get top 10 most-used child tags by atom count
let mut result = String::new();

for (i, (parent_id, parent_name)) in top_level_tags.iter().enumerate() {
for (i, (parent_id, parent_name, description)) in top_level_tags.iter().enumerate() {
// Add the top-level category
result.push_str(parent_name);
result.push('\n');
let description = description.trim();
if !description.is_empty() {
result.push_str("Description: ");
result.push_str(description);
result.push('\n');
}

// Query top 10 children by atom count
let mut children_stmt = conn
Expand Down Expand Up @@ -748,6 +800,40 @@ mod tests {
// Should have tree format
assert!(result.contains("Topics"), "Should contain parent tag");
assert!(result.contains("AI"), "Should contain child tag");
assert!(
!result.contains("Description:"),
"Should not include description lines when no guidance is configured"
);
}

#[test]
fn test_get_tag_tree_for_llm_includes_autotag_description_when_present() {
let (db, _temp) = create_test_db();
let conn = db.conn.lock().unwrap();

let tag_id = uuid::Uuid::new_v4().to_string();
let now = chrono::Utc::now().to_rfc3339();
conn.execute(
"INSERT INTO tags (id, name, parent_id, created_at, is_autotag_target, autotag_description)
VALUES (?1, ?2, NULL, ?3, 1, ?4)",
rusqlite::params![
&tag_id,
"Topics",
&now,
"Use for subject-matter themes, not people or organizations."
],
)
.unwrap();

let result = get_tag_tree_for_llm(&conn).unwrap();

assert!(result.contains("Topics"));
assert!(result
.contains("Description: Use for subject-matter themes, not people or organizations."));
assert_eq!(
default_system_prompt_for_tag_tree(&result),
SYSTEM_PROMPT_WITH_GUIDANCE
);
}

#[test]
Expand Down
Loading
Loading