From 16a0618264b4ac224cfcb696d0d60634855c1200 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 12:51:44 +0200 Subject: [PATCH 01/14] feat: add support for local files text dictionaries --- .../codebook/src/dictionaries/dictionary.rs | 15 +++++--- crates/codebook/src/dictionaries/manager.rs | 34 ++++++++++++++----- crates/codebook/src/dictionaries/repo.rs | 30 ++++++++++------ 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/crates/codebook/src/dictionaries/dictionary.rs b/crates/codebook/src/dictionaries/dictionary.rs index 0feccba..3daff00 100644 --- a/crates/codebook/src/dictionaries/dictionary.rs +++ b/crates/codebook/src/dictionaries/dictionary.rs @@ -2,6 +2,7 @@ use lru::LruCache; use std::{ collections::HashSet, + io, num::NonZeroUsize, path::PathBuf, sync::{Arc, RwLock}, @@ -158,11 +159,6 @@ impl TextDictionary { .collect(); Self { words } } - pub fn new_from_path(path: &PathBuf) -> Self { - let word_list = std::fs::read_to_string(path) - .unwrap_or_else(|_| panic!("Failed to read dictionary file: {}", path.display())); - Self::new(&word_list) - } /// Get a reference to the internal HashSet for batch operations pub fn word_set(&self) -> &HashSet { @@ -170,6 +166,15 @@ impl TextDictionary { } } +impl TryFrom<&PathBuf> for TextDictionary { + type Error = io::Error; + + fn try_from(value: &PathBuf) -> Result { + let word_list = std::fs::read_to_string(value)?; + Ok(Self::new(&word_list)) + } +} + /// Integration helper to use any Dictionary trait with optimized batch processing pub fn find_locations_with_dictionary_batch( text: &str, diff --git a/crates/codebook/src/dictionaries/manager.rs b/crates/codebook/src/dictionaries/manager.rs index f6a4728..31f77f0 100644 --- a/crates/codebook/src/dictionaries/manager.rs +++ b/crates/codebook/src/dictionaries/manager.rs @@ -1,6 +1,7 @@ use std::{ collections::HashMap, path::PathBuf, + str::FromStr, sync::{Arc, RwLock}, }; @@ -82,17 +83,32 @@ impl DictionaryManager { } fn get_text_dictionary(&self, repo: TextRepo) -> Option> { - if repo.text.is_some() { - return Some(Arc::new(TextDictionary::new(repo.text.unwrap()))); - } - let text_path = match self.downloader.get(&repo.url.unwrap()) { - Ok(path) => path, - Err(e) => { - error!("Error: {e:?}"); - return None; + const FAILED_TO_READ_DICT_ERR: &'static str = "Failed to read dictionary file"; + + let dict = match repo.text_location { + super::repo::TextRepoLocation::Url(url) => { + let text_path = self + .downloader + .get(&url) + .inspect_err(|e| error!("Error: {e:?}")) + .ok()?; + + TextDictionary::try_from(&text_path) + .inspect_err(|_| error!("{}: {}", FAILED_TO_READ_DICT_ERR, text_path.display())) + .ok()? } + super::repo::TextRepoLocation::LocalFile(path) => { + let text_path = PathBuf::from_str(&path) + .inspect_err(|e| error!("Error: {e:?}")) + .ok()?; + + TextDictionary::try_from(&text_path) + .inspect_err(|_| error!("{}: {}", FAILED_TO_READ_DICT_ERR, text_path.display())) + .ok()? + } + super::repo::TextRepoLocation::Text(text) => TextDictionary::new(text), }; - let dict = TextDictionary::new_from_path(&text_path); + Some(Arc::new(dict)) } } diff --git a/crates/codebook/src/dictionaries/repo.rs b/crates/codebook/src/dictionaries/repo.rs index dd4c2b9..f381d3d 100644 --- a/crates/codebook/src/dictionaries/repo.rs +++ b/crates/codebook/src/dictionaries/repo.rs @@ -19,18 +19,23 @@ impl HunspellRepo { } } +#[derive(Clone, Debug)] +pub enum TextRepoLocation { + Url(String), + Text(&'static str), + LocalFile(String), +} + #[derive(Clone, Debug)] pub struct TextRepo { - pub url: Option, - pub text: Option<&'static str>, + pub text_location: TextRepoLocation, pub name: String, } impl TextRepo { - pub fn new(name: &str, url: &str) -> Self { + pub fn new_url_repo(name: &str, url: &str) -> Self { Self { - url: Some(url.to_string()), - text: None, + text_location: TextRepoLocation::Url(url.to_string()), name: name.to_string(), } } @@ -119,24 +124,27 @@ static HUNSPELL_DICTIONARIES: LazyLock> = LazyLock::new(|| { static TEXT_DICTIONARIES: LazyLock> = LazyLock::new(|| { vec![ - TextRepo::new( + TextRepo::new_url_repo( "rust", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/rust/dict/rust.txt", ), - TextRepo::new( + TextRepo::new_url_repo( "software_terms", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/software-terms/dict/softwareTerms.txt", ), - TextRepo::new( + TextRepo::new_url_repo( "computing_acronyms", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/software-terms/dict/computing-acronyms.txt", ), TextRepo { name: "codebook".to_string(), - text: Some(CODEBOOK_DICTIONARY), - url: None, + text_location: TextRepoLocation::Text(CODEBOOK_DICTIONARY), + }, + TextRepo { + name: "codebook".to_string(), + text_location: TextRepoLocation::Text(CODEBOOK_DICTIONARY), }, - TextRepo::new( + TextRepo::new_url_repo( "csharp", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/csharp/csharp.txt", ), From 268d1d5a807ed8ef5094ecf3d08e2e30794d8c33 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 13:46:39 +0200 Subject: [PATCH 02/14] feat(codebook-config): add custom dict defs --- crates/codebook-config/src/settings.rs | 83 +++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 9173b98..5125970 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -1,10 +1,27 @@ use serde::{Deserialize, Serialize}; + +#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] +pub struct CustomDictionariesDefinitions { + /// The name of the custom dictionary + name: String, + + /// Relative path to the custom dictionary + path: String, + + /// Allow adding words to this dictionary + allow_add_words: bool, +} + #[derive(Debug, Serialize, Clone, PartialEq)] pub struct ConfigSettings { /// List of dictionaries to use for spell checking #[serde(default, skip_serializing_if = "Vec::is_empty")] pub dictionaries: Vec, + /// List of custom dictionaries to use for spell checking + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub custom_dictionaries_definitions: Vec, + /// Custom allowlist of words #[serde(default, skip_serializing_if = "Vec::is_empty")] pub words: Vec, @@ -56,6 +73,7 @@ impl Default for ConfigSettings { fn default() -> Self { Self { dictionaries: vec![], + custom_dictionaries_definitions: vec![], words: Vec::new(), flag_words: Vec::new(), ignore_paths: Vec::new(), @@ -79,6 +97,8 @@ impl<'de> Deserialize<'de> for ConfigSettings { #[serde(default)] dictionaries: Vec, #[serde(default)] + custom_dictionaries_definitions: Vec, + #[serde(default)] words: Vec, #[serde(default)] flag_words: Vec, @@ -95,6 +115,7 @@ impl<'de> Deserialize<'de> for ConfigSettings { let helper = Helper::deserialize(deserializer)?; Ok(ConfigSettings { dictionaries: to_lowercase_vec(helper.dictionaries), + custom_dictionaries_definitions: helper.custom_dictionaries_definitions, words: to_lowercase_vec(helper.words), flag_words: to_lowercase_vec(helper.flag_words), ignore_paths: helper.ignore_paths, @@ -106,10 +127,12 @@ impl<'de> Deserialize<'de> for ConfigSettings { } impl ConfigSettings { - /// Merge another config settings into this one, sorting and deduplicating all collections + /// Merge another config settings into this one, sorting and deduplicating all collections, prioritizing self when possible pub fn merge(&mut self, other: ConfigSettings) { // Add items from the other config self.dictionaries.extend(other.dictionaries); + self.custom_dictionaries_definitions + .extend(other.custom_dictionaries_definitions); self.words.extend(other.words); self.flag_words.extend(other.flag_words); self.ignore_paths.extend(other.ignore_paths); @@ -131,6 +154,9 @@ impl ConfigSettings { pub fn sort_and_dedup(&mut self) { // Sort and deduplicate each Vec sort_and_dedup(&mut self.dictionaries); + sort_and_dedup_by(&mut self.custom_dictionaries_definitions, |d1, d2| { + d1.name.cmp(&d2.name) + }); sort_and_dedup(&mut self.words); sort_and_dedup(&mut self.flag_words); sort_and_dedup(&mut self.ignore_paths); @@ -144,10 +170,26 @@ fn sort_and_dedup(vec: &mut Vec) { vec.dedup(); } +pub fn sort_and_dedup_by(vec: &mut Vec, f: F) +where + F: Fn(&T, &T) -> std::cmp::Ordering, +{ + vec.sort_by(&f); + vec.dedup_by(|d1, d2| f(d1, d2) == std::cmp::Ordering::Equal); +} + #[cfg(test)] mod tests { use super::*; + fn build_fake_custom_dict(name: &str) -> CustomDictionariesDefinitions { + CustomDictionariesDefinitions { + name: name.into(), + path: name.into(), + allow_add_words: false, + } + } + #[test] fn test_default() { let config = ConfigSettings::default(); @@ -221,8 +263,14 @@ mod tests { #[test] fn test_merge() { + let mut duplicate_custom_dict = build_fake_custom_dict("duplicate"); + let mut base = ConfigSettings { dictionaries: vec!["en_us".to_string()], + custom_dictionaries_definitions: vec![ + build_fake_custom_dict("base_unique"), + duplicate_custom_dict.clone(), + ], words: vec!["codebook".to_string()], flag_words: vec!["todo".to_string()], ignore_paths: vec!["**/*.md".to_string()], @@ -231,8 +279,15 @@ mod tests { min_word_length: 3, }; + // flip allow_add_words to true, to create a disparity between the dictionaries + duplicate_custom_dict.allow_add_words = !duplicate_custom_dict.allow_add_words; + let other = ConfigSettings { dictionaries: vec!["en_gb".to_string(), "en_us".to_string()], + custom_dictionaries_definitions: vec![ + duplicate_custom_dict.clone(), + build_fake_custom_dict("other_unique"), + ], words: vec!["rust".to_string()], flag_words: vec!["fixme".to_string()], ignore_paths: vec!["target/".to_string()], @@ -245,6 +300,13 @@ mod tests { // After merging and deduplicating, we should have combined items assert_eq!(base.dictionaries, vec!["en_gb", "en_us"]); + assert_eq!( + base.custom_dictionaries_definitions + .iter() + .map(|d| d.name.clone()) + .collect::>(), + vec!["base_unique", "duplicate", "other_unique"] + ); assert_eq!(base.words, vec!["codebook", "rust"]); assert_eq!(base.flag_words, vec!["fixme", "todo"]); assert_eq!(base.ignore_paths, vec!["**/*.md", "target/"]); @@ -258,6 +320,12 @@ mod tests { assert!(base.use_global); // min_word_length from other should override base (since it's non-default) assert_eq!(base.min_word_length, 2); + + // Assert that base custom_dictionaries_definitions took priority + assert_ne!( + base.custom_dictionaries_definitions.iter().find(|d| d.name == "duplicate").expect("custom_dictionaries_definitions duplicate must be present if set in ether of the merged dictionaries").allow_add_words + ,duplicate_custom_dict.allow_add_words + ); } #[test] @@ -288,6 +356,11 @@ mod tests { "en_us".to_string(), "en_gb".to_string(), ], + custom_dictionaries_definitions: vec![ + build_fake_custom_dict("custom_1"), + build_fake_custom_dict("custom_2"), + build_fake_custom_dict("custom_1"), + ], words: vec![ "rust".to_string(), "codebook".to_string(), @@ -311,6 +384,14 @@ mod tests { config.sort_and_dedup(); assert_eq!(config.dictionaries, vec!["en_gb", "en_us"]); + assert_eq!( + config + .custom_dictionaries_definitions + .iter() + .map(|d| d.name.clone()) + .collect::>(), + vec!["custom_1", "custom_2"] + ); assert_eq!(config.words, vec!["codebook", "rust"]); assert_eq!(config.flag_words, vec!["fixme", "todo"]); assert_eq!(config.ignore_paths, vec!["**/*.md", "target/"]); From 6e5615ec31aa509831b50b253db67da7740328b5 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 14:32:35 +0200 Subject: [PATCH 03/14] feat: use custom dicts in spell checking --- crates/codebook-config/src/lib.rs | 12 ++++++++ crates/codebook-config/src/settings.rs | 9 ++++-- crates/codebook/src/dictionaries/manager.rs | 34 ++++++++++++++------- crates/codebook/src/lib.rs | 5 ++- 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index d7bb976..dcff7fd 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -2,6 +2,7 @@ mod helpers; mod settings; mod watched_file; use crate::settings::ConfigSettings; +pub use crate::settings::CustomDictionariesDefinitions; use crate::watched_file::WatchedFile; use log::debug; use log::info; @@ -24,6 +25,7 @@ pub trait CodebookConfig: Sync + Send + Debug { fn add_word_global(&self, word: &str) -> Result; fn add_ignore(&self, file: &str) -> Result; fn get_dictionary_ids(&self) -> Vec; + fn get_custom_dictionaries_definitions(&self) -> Vec; fn should_ignore_path(&self, path: &Path) -> bool; fn is_allowed_word(&self, word: &str) -> bool; fn should_flag_word(&self, word: &str) -> bool; @@ -496,6 +498,11 @@ impl CodebookConfig for CodebookConfigFile { fn cache_dir(&self) -> &Path { &self.cache_dir } + + fn get_custom_dictionaries_definitions(&self) -> Vec { + let snapshot = self.snapshot(); + snapshot.custom_dictionaries_definitions.clone() + } } #[derive(Debug)] @@ -576,6 +583,11 @@ impl CodebookConfig for CodebookConfigMemory { fn cache_dir(&self) -> &Path { &self.cache_dir } + + fn get_custom_dictionaries_definitions(&self) -> Vec { + let snapshot = self.snapshot(); + snapshot.custom_dictionaries_definitions.clone() + } } #[cfg(test)] diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 5125970..11d6aee 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -3,13 +3,16 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] pub struct CustomDictionariesDefinitions { /// The name of the custom dictionary - name: String, + #[serde(default)] + pub name: String, /// Relative path to the custom dictionary - path: String, + #[serde(default)] + pub path: String, /// Allow adding words to this dictionary - allow_add_words: bool, + #[serde(default)] + pub allow_add_words: bool, } #[derive(Debug, Serialize, Clone, PartialEq)] diff --git a/crates/codebook/src/dictionaries/manager.rs b/crates/codebook/src/dictionaries/manager.rs index 31f77f0..59611bf 100644 --- a/crates/codebook/src/dictionaries/manager.rs +++ b/crates/codebook/src/dictionaries/manager.rs @@ -5,10 +5,13 @@ use std::{ sync::{Arc, RwLock}, }; +use crate::dictionaries::repo::TextRepoLocation; + use super::{ dictionary::{self, TextDictionary}, repo::{DictionaryRepo, HunspellRepo, TextRepo, get_repo}, }; +use codebook_config::CustomDictionariesDefinitions; use codebook_downloader::Downloader; use dictionary::{Dictionary, HunspellDictionary}; use log::{debug, error}; @@ -26,19 +29,29 @@ impl DictionaryManager { } } - pub fn get_dictionary(&self, id: &str) -> Option> { + pub fn get_dictionary( + &self, + id: &str, + custom_dicts_defs: &[CustomDictionariesDefinitions], + ) -> Option> { { let cache = self.dictionary_cache.read().unwrap(); if let Some(dictionary) = cache.get(id) { return Some(dictionary.clone()); } } - let repo = match get_repo(id) { - Some(r) => r, - None => { + + let repo = if let Some(custom_dict) = custom_dicts_defs.iter().find(|d| d.name == id) { + DictionaryRepo::Text(TextRepo { + name: custom_dict.name.clone(), + text_location: TextRepoLocation::LocalFile(custom_dict.path.clone()), + }) + } else { + let repo = get_repo(id); + if repo.is_none() { debug!("Failed to get repo for dictionary, skipping: {id}"); - return None; } + repo? }; let dictionary: Option> = match repo { @@ -47,13 +60,12 @@ impl DictionaryManager { }; let mut cache = self.dictionary_cache.write().unwrap(); - match dictionary { - Some(d) => { - cache.insert(id.to_string(), d.clone()); - Some(d) - } - None => None, + + if let Some(dictionary) = &dictionary { + cache.insert(id.to_string(), dictionary.clone()); } + + dictionary } fn get_hunspell_dictionary(&self, repo: HunspellRepo) -> Option> { diff --git a/crates/codebook/src/lib.rs b/crates/codebook/src/lib.rs index 2fd2120..7e4203d 100644 --- a/crates/codebook/src/lib.rs +++ b/crates/codebook/src/lib.rs @@ -103,7 +103,10 @@ impl Codebook { let mut dictionaries = Vec::with_capacity(dictionary_ids.len()); debug!("Checking text with dictionaries: {dictionary_ids:?}"); for dictionary_id in dictionary_ids { - let dictionary = self.manager.get_dictionary(&dictionary_id); + let dictionary = self.manager.get_dictionary( + &dictionary_id, + &self.config.get_custom_dictionaries_definitions(), + ); if let Some(d) = dictionary { dictionaries.push(d); } From b99d5e95bb40bfedc10af4d33afef5b2003cdb04 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 16:33:27 +0200 Subject: [PATCH 04/14] fixup! feat: use custom dicts in spell checking --- crates/codebook/src/dictionaries/repo.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/codebook/src/dictionaries/repo.rs b/crates/codebook/src/dictionaries/repo.rs index f381d3d..a666a01 100644 --- a/crates/codebook/src/dictionaries/repo.rs +++ b/crates/codebook/src/dictionaries/repo.rs @@ -140,10 +140,6 @@ static TEXT_DICTIONARIES: LazyLock> = LazyLock::new(|| { name: "codebook".to_string(), text_location: TextRepoLocation::Text(CODEBOOK_DICTIONARY), }, - TextRepo { - name: "codebook".to_string(), - text_location: TextRepoLocation::Text(CODEBOOK_DICTIONARY), - }, TextRepo::new_url_repo( "csharp", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/csharp/csharp.txt", From fe45f3843165a6fb709b2ced9e41f463ca5c2e98 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 16:34:03 +0200 Subject: [PATCH 05/14] fixup! fixup! feat: use custom dicts in spell checking --- crates/codebook-config/src/lib.rs | 23 +++++++++++- crates/codebook-config/src/settings.rs | 51 +++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index dcff7fd..ac5f1e3 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -217,14 +217,23 @@ impl CodebookConfigFile { project_config: &WatchedFile, global_config: &WatchedFile, ) -> ConfigSettings { - let project = project_config + let mut project = project_config .content() .cloned() .unwrap_or_else(ConfigSettings::default); + if let Some(path) = project_config.path() { + project.try_normalizing_relative_paths(path); + } + if project.use_global { if let Some(global) = global_config.content() { let mut effective = global.clone(); + + if let Some(path) = global_config.path() { + project.try_normalizing_relative_paths(path); + } + effective.merge(project); effective } else { @@ -527,6 +536,18 @@ impl CodebookConfigMemory { cache_dir: env::temp_dir().join(CACHE_DIR), } } + + pub fn add_dict_id(&self, id: &str) { + let mut settings = self.settings.write().unwrap(); + settings.dictionaries.push(id.into()); + settings.sort_and_dedup(); + } + + pub fn add_custom_dict(&self, custom_dict: CustomDictionariesDefinitions) { + let mut settings = self.settings.write().unwrap(); + settings.custom_dictionaries_definitions.push(custom_dict); + settings.sort_and_dedup(); + } } impl CodebookConfigMemory { diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 11d6aee..4f470e2 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -1,3 +1,5 @@ +use std::path::{self, Path}; + use serde::{Deserialize, Serialize}; #[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] @@ -6,7 +8,7 @@ pub struct CustomDictionariesDefinitions { #[serde(default)] pub name: String, - /// Relative path to the custom dictionary + /// An absolute or relative path to the custom dictionary #[serde(default)] pub path: String, @@ -165,6 +167,21 @@ impl ConfigSettings { sort_and_dedup(&mut self.ignore_paths); sort_and_dedup(&mut self.ignore_patterns); } + + pub fn try_normalizing_relative_paths(&mut self, config_path: &Path) { + let config_path = config_path.parent(); + if config_path.is_none() { + return; + } + let config_path = config_path.unwrap(); + + for current_dict in &mut self.custom_dictionaries_definitions { + let custom_dict_path = Path::new(¤t_dict.path); + if let Ok(path) = path::absolute(config_path.join(custom_dict_path)) { + current_dict.path = path.to_str().unwrap().to_string(); + } + } + } } /// Helper function to sort and deduplicate a Vec of strings @@ -439,4 +456,36 @@ mod tests { assert_eq!(config.ignore_patterns, Vec::::new()); assert!(config.use_global); } + + #[test] + fn test_relative_path_normalization() { + let base_config_path = "/tmp/coodbook.toml"; + let absolute_dict_path = "/absolute_dict.txt"; + let relative_dict_path = "./relative_dict.txt"; + let expected_relative_path = "/tmp/relative_dict.txt"; + + let mut config = ConfigSettings::default(); + + let mut relative_custom_dict = CustomDictionariesDefinitions::default(); + relative_custom_dict.path = relative_dict_path.to_string(); + let mut absolute_dict = CustomDictionariesDefinitions::default(); + absolute_dict.path = absolute_dict_path.to_string(); + + config.custom_dictionaries_definitions.push(absolute_dict); + + config + .custom_dictionaries_definitions + .push(relative_custom_dict); + + config.try_normalizing_relative_paths(Path::new(base_config_path)); + + assert_eq!( + config + .custom_dictionaries_definitions + .iter() + .map(|d| d.path.clone()) + .collect::>(), + vec![absolute_dict_path, expected_relative_path] + ); + } } From 846a034dc7362e3b64a68d44fa8a1d851608a133 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 16:34:06 +0200 Subject: [PATCH 06/14] test(custom_dict): test custom dict behavior --- .../codebook/tests/examples/custom_dict.txt | 1 + crates/codebook/tests/test_custom_dicts.rs | 49 +++++++++++++++++++ crates/codebook/tests/test_files.rs | 7 +-- crates/codebook/tests/utils/mod.rs | 5 ++ 4 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 crates/codebook/tests/examples/custom_dict.txt create mode 100644 crates/codebook/tests/test_custom_dicts.rs diff --git a/crates/codebook/tests/examples/custom_dict.txt b/crates/codebook/tests/examples/custom_dict.txt new file mode 100644 index 0000000..ce7739a --- /dev/null +++ b/crates/codebook/tests/examples/custom_dict.txt @@ -0,0 +1 @@ +mycustomcorrectword diff --git a/crates/codebook/tests/test_custom_dicts.rs b/crates/codebook/tests/test_custom_dicts.rs new file mode 100644 index 0000000..8342d21 --- /dev/null +++ b/crates/codebook/tests/test_custom_dicts.rs @@ -0,0 +1,49 @@ +use codebook::{Codebook, queries::LanguageType}; +use codebook_config::{CodebookConfigMemory, CustomDictionariesDefinitions}; +use std::sync::Arc; + +mod utils; +use crate::utils::example_file_path; + +const CUSTOM_WORD: &'static str = "mycustomcorrectword"; + +pub fn get_processor(enable_custom_dict: bool) -> Codebook { + let config = Arc::new(CodebookConfigMemory::default()); + + let custom_dict_name = "my_dict"; + let custom_dict_path = example_file_path("custom_dict.txt"); + let custom_dict = CustomDictionariesDefinitions { + name: custom_dict_name.to_owned(), + path: custom_dict_path, + allow_add_words: false, + }; + config.add_custom_dict(custom_dict); + + if enable_custom_dict { + config.add_dict_id(&custom_dict_name); + } + + Codebook::new(config.clone()).unwrap() +} + +#[test] +fn test_custom_dict_unused_if_not_added_to_dicts() { + let processor = get_processor(false); + let misspelled = processor + .spell_check(CUSTOM_WORD, Some(LanguageType::Text), None) + .to_vec(); + + assert_eq!(misspelled[0].word, CUSTOM_WORD); +} + +#[test] +fn test_custom_dict_used_if_added_to_dicts() { + let processor = get_processor(true); + + let misspelled = processor + .spell_check(CUSTOM_WORD, Some(LanguageType::Text), None) + .to_vec(); + + // active custom dict + assert!(misspelled.is_empty()); +} diff --git a/crates/codebook/tests/test_files.rs b/crates/codebook/tests/test_files.rs index 8ac409f..ec4174c 100644 --- a/crates/codebook/tests/test_files.rs +++ b/crates/codebook/tests/test_files.rs @@ -3,12 +3,9 @@ use codebook::{ queries::LanguageType, }; -mod utils; +use crate::utils::example_file_path; -fn example_file_path(file: &str) -> String { - // get root of the project through CARGO_MANIFEST_DIR - format!("tests/examples/{file}") -} +mod utils; #[test] fn test_ignore_file() { diff --git a/crates/codebook/tests/utils/mod.rs b/crates/codebook/tests/utils/mod.rs index d86efce..359ae83 100644 --- a/crates/codebook/tests/utils/mod.rs +++ b/crates/codebook/tests/utils/mod.rs @@ -15,3 +15,8 @@ pub fn get_processor() -> Codebook { pub fn init_logging() { let _ = env_logger::builder().is_test(true).try_init(); } + +pub fn example_file_path(file: &str) -> String { + // get root of the project through CARGO_MANIFEST_DIR + format!("tests/examples/{file}") +} From ad92752a4453b9857e15e2d8feedad2e3f094d6b Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 18:50:09 +0200 Subject: [PATCH 07/14] test: absolute and relative custom dir manager tests --- crates/codebook-config/src/lib.rs | 57 +++++++++++++++++++-- crates/codebook-config/src/settings.rs | 14 ++--- crates/codebook/src/dictionaries/manager.rs | 4 +- crates/codebook/tests/test_custom_dicts.rs | 4 +- 4 files changed, 63 insertions(+), 16 deletions(-) diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index ac5f1e3..19261c3 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -2,7 +2,7 @@ mod helpers; mod settings; mod watched_file; use crate::settings::ConfigSettings; -pub use crate::settings::CustomDictionariesDefinitions; +pub use crate::settings::CustomDictionariesEntry; use crate::watched_file::WatchedFile; use log::debug; use log::info; @@ -25,7 +25,7 @@ pub trait CodebookConfig: Sync + Send + Debug { fn add_word_global(&self, word: &str) -> Result; fn add_ignore(&self, file: &str) -> Result; fn get_dictionary_ids(&self) -> Vec; - fn get_custom_dictionaries_definitions(&self) -> Vec; + fn get_custom_dictionaries_definitions(&self) -> Vec; fn should_ignore_path(&self, path: &Path) -> bool; fn is_allowed_word(&self, word: &str) -> bool; fn should_flag_word(&self, word: &str) -> bool; @@ -508,7 +508,7 @@ impl CodebookConfig for CodebookConfigFile { &self.cache_dir } - fn get_custom_dictionaries_definitions(&self) -> Vec { + fn get_custom_dictionaries_definitions(&self) -> Vec { let snapshot = self.snapshot(); snapshot.custom_dictionaries_definitions.clone() } @@ -543,7 +543,7 @@ impl CodebookConfigMemory { settings.sort_and_dedup(); } - pub fn add_custom_dict(&self, custom_dict: CustomDictionariesDefinitions) { + pub fn add_custom_dict(&self, custom_dict: CustomDictionariesEntry) { let mut settings = self.settings.write().unwrap(); settings.custom_dictionaries_definitions.push(custom_dict); settings.sort_and_dedup(); @@ -605,7 +605,7 @@ impl CodebookConfig for CodebookConfigMemory { &self.cache_dir } - fn get_custom_dictionaries_definitions(&self) -> Vec { + fn get_custom_dictionaries_definitions(&self) -> Vec { let snapshot = self.snapshot(); snapshot.custom_dictionaries_definitions.clone() } @@ -1099,4 +1099,51 @@ mod tests { Ok(()) } + + #[test] + fn test_normalization_of_custom_dict_paths() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + let relative_custom_dict_path = temp_dir.path().join("custom_rel.txt"); + let absolute_custom_dict_path = temp_dir.path().join("custom_abs.txt"); + let mut file = File::create(&config_path)?; + File::create(&relative_custom_dict_path)?; + File::create(&absolute_custom_dict_path)?; + + let expected = vec![ + CustomDictionariesEntry { + name: "absolute".to_owned(), + path: absolute_custom_dict_path.to_str().unwrap().to_string(), + allow_add_words: true, + }, + CustomDictionariesEntry { + name: "relative".to_owned(), + path: relative_custom_dict_path.to_str().unwrap().to_string(), + allow_add_words: false, + }, + ]; + + let a = format!( + r#" + [[custom_dictionaries_definitions]] + name = "absolute" + path = "{}" + allow_add_words = true + + [[custom_dictionaries_definitions]] + name = "relative" + path = "{}" + allow_add_words = false + "#, + absolute_custom_dict_path.display(), + relative_custom_dict_path.display(), + ); + file.write_all(a.as_bytes())?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + let custom_dicts = config.snapshot().custom_dictionaries_definitions.clone(); + assert_eq!(expected, custom_dicts); + + Ok(()) + } } diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 4f470e2..4d340cb 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -3,7 +3,7 @@ use std::path::{self, Path}; use serde::{Deserialize, Serialize}; #[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] -pub struct CustomDictionariesDefinitions { +pub struct CustomDictionariesEntry { /// The name of the custom dictionary #[serde(default)] pub name: String, @@ -25,7 +25,7 @@ pub struct ConfigSettings { /// List of custom dictionaries to use for spell checking #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub custom_dictionaries_definitions: Vec, + pub custom_dictionaries_definitions: Vec, /// Custom allowlist of words #[serde(default, skip_serializing_if = "Vec::is_empty")] @@ -102,7 +102,7 @@ impl<'de> Deserialize<'de> for ConfigSettings { #[serde(default)] dictionaries: Vec, #[serde(default)] - custom_dictionaries_definitions: Vec, + custom_dictionaries_definitions: Vec, #[serde(default)] words: Vec, #[serde(default)] @@ -202,8 +202,8 @@ where mod tests { use super::*; - fn build_fake_custom_dict(name: &str) -> CustomDictionariesDefinitions { - CustomDictionariesDefinitions { + fn build_fake_custom_dict(name: &str) -> CustomDictionariesEntry { + CustomDictionariesEntry { name: name.into(), path: name.into(), allow_add_words: false, @@ -466,9 +466,9 @@ mod tests { let mut config = ConfigSettings::default(); - let mut relative_custom_dict = CustomDictionariesDefinitions::default(); + let mut relative_custom_dict = CustomDictionariesEntry::default(); relative_custom_dict.path = relative_dict_path.to_string(); - let mut absolute_dict = CustomDictionariesDefinitions::default(); + let mut absolute_dict = CustomDictionariesEntry::default(); absolute_dict.path = absolute_dict_path.to_string(); config.custom_dictionaries_definitions.push(absolute_dict); diff --git a/crates/codebook/src/dictionaries/manager.rs b/crates/codebook/src/dictionaries/manager.rs index 59611bf..1f9eda7 100644 --- a/crates/codebook/src/dictionaries/manager.rs +++ b/crates/codebook/src/dictionaries/manager.rs @@ -11,7 +11,7 @@ use super::{ dictionary::{self, TextDictionary}, repo::{DictionaryRepo, HunspellRepo, TextRepo, get_repo}, }; -use codebook_config::CustomDictionariesDefinitions; +use codebook_config::CustomDictionariesEntry; use codebook_downloader::Downloader; use dictionary::{Dictionary, HunspellDictionary}; use log::{debug, error}; @@ -32,7 +32,7 @@ impl DictionaryManager { pub fn get_dictionary( &self, id: &str, - custom_dicts_defs: &[CustomDictionariesDefinitions], + custom_dicts_defs: &[CustomDictionariesEntry], ) -> Option> { { let cache = self.dictionary_cache.read().unwrap(); diff --git a/crates/codebook/tests/test_custom_dicts.rs b/crates/codebook/tests/test_custom_dicts.rs index 8342d21..38a0677 100644 --- a/crates/codebook/tests/test_custom_dicts.rs +++ b/crates/codebook/tests/test_custom_dicts.rs @@ -1,5 +1,5 @@ use codebook::{Codebook, queries::LanguageType}; -use codebook_config::{CodebookConfigMemory, CustomDictionariesDefinitions}; +use codebook_config::{CodebookConfigMemory, CustomDictionariesEntry}; use std::sync::Arc; mod utils; @@ -12,7 +12,7 @@ pub fn get_processor(enable_custom_dict: bool) -> Codebook { let custom_dict_name = "my_dict"; let custom_dict_path = example_file_path("custom_dict.txt"); - let custom_dict = CustomDictionariesDefinitions { + let custom_dict = CustomDictionariesEntry { name: custom_dict_name.to_owned(), path: custom_dict_path, allow_add_words: false, From 305260d3d8a08634b25e016743ff6e8754ce60a8 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 19:34:54 +0200 Subject: [PATCH 08/14] chore: normalize custom dict names --- crates/codebook-config/src/settings.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 4d340cb..4f8828e 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -120,7 +120,14 @@ impl<'de> Deserialize<'de> for ConfigSettings { let helper = Helper::deserialize(deserializer)?; Ok(ConfigSettings { dictionaries: to_lowercase_vec(helper.dictionaries), - custom_dictionaries_definitions: helper.custom_dictionaries_definitions, + custom_dictionaries_definitions: helper + .custom_dictionaries_definitions + .into_iter() + .map(|c| { + c.name.to_ascii_lowercase(); + c + }) + .collect(), words: to_lowercase_vec(helper.words), flag_words: to_lowercase_vec(helper.flag_words), ignore_paths: helper.ignore_paths, From 336894396aacff488d73792b023fcb53eacbfe29 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 20:34:48 +0200 Subject: [PATCH 09/14] feat: add word to custom dict support --- Cargo.lock | 1 + crates/codebook-config/src/lib.rs | 2 +- crates/codebook-config/src/settings.rs | 16 ++--- crates/codebook-lsp/src/lsp.rs | 70 +++++++++++++++++++++ crates/codebook/Cargo.toml | 1 + crates/codebook/src/dictionaries/manager.rs | 5 ++ crates/codebook/src/errors.rs | 12 ++++ crates/codebook/src/lib.rs | 43 ++++++++++++- 8 files changed, 140 insertions(+), 10 deletions(-) create mode 100644 crates/codebook/src/errors.rs diff --git a/Cargo.lock b/Cargo.lock index 552e0aa..54fd0b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -428,6 +428,7 @@ dependencies = [ "spellbook", "streaming-iterator", "tempfile", + "thiserror 2.0.17", "tree-sitter", "tree-sitter-bash", "tree-sitter-c", diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index 19261c3..9529e92 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -231,7 +231,7 @@ impl CodebookConfigFile { let mut effective = global.clone(); if let Some(path) = global_config.path() { - project.try_normalizing_relative_paths(path); + effective.try_normalizing_relative_paths(path); } effective.merge(project); diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 4f8828e..b5a3839 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -123,8 +123,8 @@ impl<'de> Deserialize<'de> for ConfigSettings { custom_dictionaries_definitions: helper .custom_dictionaries_definitions .into_iter() - .map(|c| { - c.name.to_ascii_lowercase(); + .map(|mut c| { + c.name.make_ascii_lowercase(); c }) .collect(), @@ -176,15 +176,15 @@ impl ConfigSettings { } pub fn try_normalizing_relative_paths(&mut self, config_path: &Path) { - let config_path = config_path.parent(); - if config_path.is_none() { + let config_path_parent = if let Some(config_path_parent) = config_path.parent() { + config_path_parent + } else { return; - } - let config_path = config_path.unwrap(); + }; for current_dict in &mut self.custom_dictionaries_definitions { - let custom_dict_path = Path::new(¤t_dict.path); - if let Ok(path) = path::absolute(config_path.join(custom_dict_path)) { + if let Ok(path) = path::absolute(config_path_parent.join(Path::new(¤t_dict.path))) + { current_dict.path = path.to_str().unwrap().to_string(); } } diff --git a/crates/codebook-lsp/src/lsp.rs b/crates/codebook-lsp/src/lsp.rs index c07f343..a2cdfe6 100644 --- a/crates/codebook-lsp/src/lsp.rs +++ b/crates/codebook-lsp/src/lsp.rs @@ -1,8 +1,10 @@ use std::collections::HashMap; +use std::collections::HashSet; use std::path::Path; use std::str::FromStr as _; use std::sync::Arc; +use codebook::errors::DictModificationError; use codebook::parser::get_word_from_string; use codebook::queries::LanguageType; use string_offsets::AllConfig; @@ -13,6 +15,8 @@ use log::LevelFilter; use log::error; use serde_json::Value; use tokio::task; +use tower_lsp::jsonrpc::Error as RpcError; +use tower_lsp::jsonrpc::ErrorCode; use tower_lsp::jsonrpc::Result as RpcResult; use tower_lsp::lsp_types::*; use tower_lsp::{Client, LanguageServer}; @@ -37,6 +41,7 @@ pub struct Backend { enum CodebookCommand { AddWord, AddWordGlobal, + AddWordDict, Unknown, } @@ -45,6 +50,7 @@ impl From<&str> for CodebookCommand { match command { "codebook.addWord" => CodebookCommand::AddWord, "codebook.addWordGlobal" => CodebookCommand::AddWordGlobal, + "codebook.addWordDict" => CodebookCommand::AddWordDict, _ => CodebookCommand::Unknown, } } @@ -55,6 +61,7 @@ impl From for String { match command { CodebookCommand::AddWord => "codebook.addWord".to_string(), CodebookCommand::AddWordGlobal => "codebook.addWordGlobal".to_string(), + CodebookCommand::AddWordDict => "codebook.addWordDict".to_string(), CodebookCommand::Unknown => "codebook.unknown".to_string(), } } @@ -95,6 +102,7 @@ impl LanguageServer for Backend { commands: vec![ CodebookCommand::AddWord.into(), CodebookCommand::AddWordGlobal.into(), + CodebookCommand::AddWordDict.into(), ], work_done_progress_options: Default::default(), }), @@ -255,6 +263,35 @@ impl LanguageServer for Backend { disabled: None, data: None, })); + + let active_dict_ids = self + .config + .get_dictionary_ids() + .into_iter() + .collect::>(); + + for custom_dict in self.config.get_custom_dictionaries_definitions() { + if !custom_dict.allow_add_words || !active_dict_ids.contains(&custom_dict.name) { + continue; + } + + let custom_dict_name = custom_dict.name; + let title = format!("Add '{word}' to '{custom_dict_name}' dictionary"); + actions.push(CodeActionOrCommand::CodeAction(CodeAction { + title: title.clone(), + kind: Some(CodeActionKind::QUICKFIX), + diagnostics: None, + edit: None, + command: Some(Command { + title: title, + command: CodebookCommand::AddWordDict.into(), + arguments: Some(vec![custom_dict_name.into(), word.to_string().into()]), + }), + is_preferred: None, + disabled: None, + data: None, + })); + } } match actions.is_empty() { true => Ok(None), @@ -292,6 +329,28 @@ impl LanguageServer for Backend { } Ok(None) } + CodebookCommand::AddWordDict => { + let dict_id = params + .arguments + .first() + .and_then(|arg| arg.as_str()) + .ok_or(RpcError::new(ErrorCode::InvalidParams))? + .to_string(); + + let words = params + .arguments + .iter() + .skip(1) + .filter_map(|arg| arg.as_str().map(|s| s.to_string())); + + let updated = self.add_words_to_dict(&dict_id, words); + if updated { + self.codebook.refresh_custom_dictionary(&dict_id); + + self.recheck_all().await; + } + Ok(None) + } CodebookCommand::Unknown => Ok(None), } } @@ -369,6 +428,17 @@ impl Backend { } should_save } + fn add_words_to_dict(&self, dict_id: &str, words: impl Iterator) -> bool { + let mut should_save = false; + for word in words { + match self.codebook.add_word_to_custom_dictionary(&word, dict_id) { + Ok(_) => should_save = true, + Err(e @ DictModificationError::WordAlreadyExists(_)) => info!("{e}"), + Err(e) => error!("{e}"), + }; + } + should_save + } fn make_suggestion(&self, suggestion: &str, range: &Range, uri: &Url) -> CodeAction { let title = format!("Replace with '{suggestion}'"); diff --git a/crates/codebook/Cargo.toml b/crates/codebook/Cargo.toml index 29d94c7..7422bd2 100644 --- a/crates/codebook/Cargo.toml +++ b/crates/codebook/Cargo.toml @@ -52,6 +52,7 @@ tree-sitter-zig.workspace = true tree-sitter-c-sharp.workspace = true tree-sitter.workspace = true unicode-segmentation.workspace = true +thiserror.workspace = true codebook_config.workspace = true codebook_downloader.workspace = true diff --git a/crates/codebook/src/dictionaries/manager.rs b/crates/codebook/src/dictionaries/manager.rs index 1f9eda7..c651194 100644 --- a/crates/codebook/src/dictionaries/manager.rs +++ b/crates/codebook/src/dictionaries/manager.rs @@ -29,6 +29,11 @@ impl DictionaryManager { } } + pub fn invalidate_cache_entry(&self, id: &str) { + let mut cache = self.dictionary_cache.write().unwrap(); + cache.remove(id); + } + pub fn get_dictionary( &self, id: &str, diff --git a/crates/codebook/src/errors.rs b/crates/codebook/src/errors.rs new file mode 100644 index 0000000..e2d9c91 --- /dev/null +++ b/crates/codebook/src/errors.rs @@ -0,0 +1,12 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum DictModificationError { + #[error("Failed to write data due to: {0}")] + WriteFailed(#[from] std::io::Error), + #[error("The word '{0}' already exists in the given dictionary")] + WordAlreadyExists(String), + + #[error("The '{0}' dict ID is not present in the configuration")] + UnknownDictID(String), +} diff --git a/crates/codebook/src/lib.rs b/crates/codebook/src/lib.rs index 7e4203d..111bf90 100644 --- a/crates/codebook/src/lib.rs +++ b/crates/codebook/src/lib.rs @@ -1,13 +1,16 @@ pub mod dictionaries; +pub mod errors; mod logging; pub mod parser; pub mod queries; pub mod regexes; mod splitter; +use crate::errors::DictModificationError; use crate::regexes::get_default_skip_patterns; -use std::path::Path; +use std::io::Write; use std::sync::Arc; +use std::{fs::File, path::Path}; use codebook_config::CodebookConfig; use dictionaries::{dictionary, manager::DictionaryManager}; @@ -141,6 +144,44 @@ impl Codebook { } Some(collect_round_robin(&suggestions, max_results)) } + + pub fn add_word_to_custom_dictionary( + &self, + word: &str, + dict_id: &str, + ) -> Result<(), DictModificationError> { + let custom_dicts_defs = &self.config.get_custom_dictionaries_definitions(); + + let dict = self.manager.get_dictionary(dict_id, custom_dicts_defs); + + if dict.is_none() { + return Err(DictModificationError::UnknownDictID(dict_id.to_string())); + } + let dict = dict.unwrap(); + + if dict.check(word) { + return Err(DictModificationError::WordAlreadyExists(word.to_string())); + } + + if let Some(custom_dict) = custom_dicts_defs + .iter() + .find(|d| d.allow_add_words && d.name == dict_id) + { + let mut file = File::options() + .append(true) + .create(false) + .open(&custom_dict.path)?; + write!(file, "\n{}", word)?; + } + + Ok(()) + } + + pub fn refresh_custom_dictionary(&self, dict_id: &str) { + self.manager.invalidate_cache_entry(dict_id); + self.manager + .get_dictionary(dict_id, &self.config.get_custom_dictionaries_definitions()); + } } fn collect_round_robin(sources: &[Vec], max_count: usize) -> Vec { From 73db52913f12d224c3ae837f29755bfc2c594da7 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Tue, 4 Nov 2025 22:16:16 +0200 Subject: [PATCH 10/14] chore: left comments for later --- crates/codebook/src/lib.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/crates/codebook/src/lib.rs b/crates/codebook/src/lib.rs index 111bf90..f8434b7 100644 --- a/crates/codebook/src/lib.rs +++ b/crates/codebook/src/lib.rs @@ -167,11 +167,24 @@ impl Codebook { .iter() .find(|d| d.allow_add_words && d.name == dict_id) { + // FIXME: I am still unsure where to maintain an dict_id to WatchedFile map, for now I + // am just going to use simple file operations to write to the end of the file. + // Also we should make sure we are writing to a text file, a simple solution would be to + // filter out dict paths based on extensions, so if the path isn't ending with .dict + // or .txt we just toss the update away. let mut file = File::options() + .read(true) .append(true) .create(false) .open(&custom_dict.path)?; - write!(file, "\n{}", word)?; + + // FIXME: we should check if the last byte of the dict is a new line and only prepend + // newlines if its missing, I have bigger fish to fry right now + if file.metadata()?.len() == 0 { + write!(file, "{}", word)?; + } else { + write!(file, "\n{}", word)?; + } } Ok(()) From aa0068a4f9a7c35f12e9d9e28f41f661d5bba549 Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Wed, 5 Nov 2025 00:55:08 +0200 Subject: [PATCH 11/14] refactor: replace try_normalizing_relative_paths with config_file_path --- crates/codebook-config/src/lib.rs | 21 +++--- crates/codebook-config/src/settings.rs | 75 ++++++++------------- crates/codebook/src/dictionaries/manager.rs | 20 +++--- crates/codebook/src/dictionaries/repo.rs | 4 +- crates/codebook/src/lib.rs | 2 +- crates/codebook/tests/test_custom_dicts.rs | 2 +- 6 files changed, 51 insertions(+), 73 deletions(-) diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index 9529e92..8f695a2 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -200,8 +200,11 @@ impl CodebookConfigFile { let path = path.as_ref(); let content = fs::read_to_string(path)?; - match toml::from_str(&content) { - Ok(settings) => Ok(settings), + match toml::from_str::(&content) { + Ok(mut settings) => { + settings.set_config_file_paths(path); + Ok(settings) + } Err(e) => { let err = io::Error::new( ErrorKind::InvalidData, @@ -217,23 +220,15 @@ impl CodebookConfigFile { project_config: &WatchedFile, global_config: &WatchedFile, ) -> ConfigSettings { - let mut project = project_config + let project = project_config .content() .cloned() .unwrap_or_else(ConfigSettings::default); - if let Some(path) = project_config.path() { - project.try_normalizing_relative_paths(path); - } - if project.use_global { if let Some(global) = global_config.content() { let mut effective = global.clone(); - if let Some(path) = global_config.path() { - effective.try_normalizing_relative_paths(path); - } - effective.merge(project); effective } else { @@ -1103,7 +1098,7 @@ mod tests { #[test] fn test_normalization_of_custom_dict_paths() -> Result<(), io::Error> { let temp_dir = TempDir::new().unwrap(); - let config_path = temp_dir.path().join("codebook.toml"); + let config_path = Arc::from(temp_dir.path().join("codebook.toml").as_path()); let relative_custom_dict_path = temp_dir.path().join("custom_rel.txt"); let absolute_custom_dict_path = temp_dir.path().join("custom_abs.txt"); let mut file = File::create(&config_path)?; @@ -1115,11 +1110,13 @@ mod tests { name: "absolute".to_owned(), path: absolute_custom_dict_path.to_str().unwrap().to_string(), allow_add_words: true, + config_file_path: Some(config_path.clone()), }, CustomDictionariesEntry { name: "relative".to_owned(), path: relative_custom_dict_path.to_str().unwrap().to_string(), allow_add_words: false, + config_file_path: Some(config_path.clone()), }, ]; diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index b5a3839..ea81d99 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -1,4 +1,8 @@ -use std::path::{self, Path}; +use std::{ + io, + path::{self, Path, PathBuf}, + sync::Arc, +}; use serde::{Deserialize, Serialize}; @@ -15,6 +19,25 @@ pub struct CustomDictionariesEntry { /// Allow adding words to this dictionary #[serde(default)] pub allow_add_words: bool, + + /// For internal use to track the coodbook.toml that originated this entry + #[serde(skip)] + pub config_file_path: Option>, +} + +impl CustomDictionariesEntry { + pub fn resolve_full_path(&self) -> Result { + let full_path = if let Some(config_file_path) = &self.config_file_path { + config_file_path + .parent() + .ok_or(io::Error::from(io::ErrorKind::NotFound))? + .join(Path::new(&self.path)) + } else { + PathBuf::from(&self.path) + }; + + path::absolute(&full_path) + } } #[derive(Debug, Serialize, Clone, PartialEq)] @@ -175,18 +198,10 @@ impl ConfigSettings { sort_and_dedup(&mut self.ignore_patterns); } - pub fn try_normalizing_relative_paths(&mut self, config_path: &Path) { - let config_path_parent = if let Some(config_path_parent) = config_path.parent() { - config_path_parent - } else { - return; - }; - - for current_dict in &mut self.custom_dictionaries_definitions { - if let Ok(path) = path::absolute(config_path_parent.join(Path::new(¤t_dict.path))) - { - current_dict.path = path.to_str().unwrap().to_string(); - } + pub fn set_config_file_paths(&mut self, config_path: &Path) { + let config_path: Arc = Arc::from(config_path); + for custom_directory in &mut self.custom_dictionaries_definitions { + custom_directory.config_file_path = Some(config_path.clone()); } } } @@ -213,7 +228,7 @@ mod tests { CustomDictionariesEntry { name: name.into(), path: name.into(), - allow_add_words: false, + ..Default::default() } } @@ -463,36 +478,4 @@ mod tests { assert_eq!(config.ignore_patterns, Vec::::new()); assert!(config.use_global); } - - #[test] - fn test_relative_path_normalization() { - let base_config_path = "/tmp/coodbook.toml"; - let absolute_dict_path = "/absolute_dict.txt"; - let relative_dict_path = "./relative_dict.txt"; - let expected_relative_path = "/tmp/relative_dict.txt"; - - let mut config = ConfigSettings::default(); - - let mut relative_custom_dict = CustomDictionariesEntry::default(); - relative_custom_dict.path = relative_dict_path.to_string(); - let mut absolute_dict = CustomDictionariesEntry::default(); - absolute_dict.path = absolute_dict_path.to_string(); - - config.custom_dictionaries_definitions.push(absolute_dict); - - config - .custom_dictionaries_definitions - .push(relative_custom_dict); - - config.try_normalizing_relative_paths(Path::new(base_config_path)); - - assert_eq!( - config - .custom_dictionaries_definitions - .iter() - .map(|d| d.path.clone()) - .collect::>(), - vec![absolute_dict_path, expected_relative_path] - ); - } } diff --git a/crates/codebook/src/dictionaries/manager.rs b/crates/codebook/src/dictionaries/manager.rs index c651194..77a8b2d 100644 --- a/crates/codebook/src/dictionaries/manager.rs +++ b/crates/codebook/src/dictionaries/manager.rs @@ -1,7 +1,6 @@ use std::{ collections::HashMap, path::PathBuf, - str::FromStr, sync::{Arc, RwLock}, }; @@ -49,7 +48,12 @@ impl DictionaryManager { let repo = if let Some(custom_dict) = custom_dicts_defs.iter().find(|d| d.name == id) { DictionaryRepo::Text(TextRepo { name: custom_dict.name.clone(), - text_location: TextRepoLocation::LocalFile(custom_dict.path.clone()), + text_location: TextRepoLocation::LocalFile( + custom_dict + .resolve_full_path() + .inspect_err(|e| error!("Failed to build local text repo due to: {e}")) + .ok()?, + ), }) } else { let repo = get_repo(id); @@ -114,15 +118,9 @@ impl DictionaryManager { .inspect_err(|_| error!("{}: {}", FAILED_TO_READ_DICT_ERR, text_path.display())) .ok()? } - super::repo::TextRepoLocation::LocalFile(path) => { - let text_path = PathBuf::from_str(&path) - .inspect_err(|e| error!("Error: {e:?}")) - .ok()?; - - TextDictionary::try_from(&text_path) - .inspect_err(|_| error!("{}: {}", FAILED_TO_READ_DICT_ERR, text_path.display())) - .ok()? - } + super::repo::TextRepoLocation::LocalFile(path) => TextDictionary::try_from(&path) + .inspect_err(|_| error!("{}: {}", FAILED_TO_READ_DICT_ERR, path.display())) + .ok()?, super::repo::TextRepoLocation::Text(text) => TextDictionary::new(text), }; diff --git a/crates/codebook/src/dictionaries/repo.rs b/crates/codebook/src/dictionaries/repo.rs index a666a01..2f25aec 100644 --- a/crates/codebook/src/dictionaries/repo.rs +++ b/crates/codebook/src/dictionaries/repo.rs @@ -1,4 +1,4 @@ -use std::sync::LazyLock; +use std::{path::PathBuf, sync::LazyLock}; static CODEBOOK_DICTIONARY: &str = include_str!("./combined.gen.txt"); @@ -23,7 +23,7 @@ impl HunspellRepo { pub enum TextRepoLocation { Url(String), Text(&'static str), - LocalFile(String), + LocalFile(PathBuf), } #[derive(Clone, Debug)] diff --git a/crates/codebook/src/lib.rs b/crates/codebook/src/lib.rs index f8434b7..e950a14 100644 --- a/crates/codebook/src/lib.rs +++ b/crates/codebook/src/lib.rs @@ -176,7 +176,7 @@ impl Codebook { .read(true) .append(true) .create(false) - .open(&custom_dict.path)?; + .open(&custom_dict.resolve_full_path()?)?; // FIXME: we should check if the last byte of the dict is a new line and only prepend // newlines if its missing, I have bigger fish to fry right now diff --git a/crates/codebook/tests/test_custom_dicts.rs b/crates/codebook/tests/test_custom_dicts.rs index 38a0677..17748c6 100644 --- a/crates/codebook/tests/test_custom_dicts.rs +++ b/crates/codebook/tests/test_custom_dicts.rs @@ -15,7 +15,7 @@ pub fn get_processor(enable_custom_dict: bool) -> Codebook { let custom_dict = CustomDictionariesEntry { name: custom_dict_name.to_owned(), path: custom_dict_path, - allow_add_words: false, + ..Default::default() }; config.add_custom_dict(custom_dict); From e7724694ab35925898776b04e419cc20457d52cb Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Wed, 5 Nov 2025 21:05:15 +0200 Subject: [PATCH 12/14] chore: convert path to PathBuf --- crates/codebook-config/src/lib.rs | 4 ++-- crates/codebook-config/src/settings.rs | 6 +++--- crates/codebook/tests/test_custom_dicts.rs | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index 8f695a2..fde91d6 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -1108,13 +1108,13 @@ mod tests { let expected = vec![ CustomDictionariesEntry { name: "absolute".to_owned(), - path: absolute_custom_dict_path.to_str().unwrap().to_string(), + path: absolute_custom_dict_path.clone(), allow_add_words: true, config_file_path: Some(config_path.clone()), }, CustomDictionariesEntry { name: "relative".to_owned(), - path: relative_custom_dict_path.to_str().unwrap().to_string(), + path: relative_custom_dict_path.clone(), allow_add_words: false, config_file_path: Some(config_path.clone()), }, diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index ea81d99..d278562 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -14,7 +14,7 @@ pub struct CustomDictionariesEntry { /// An absolute or relative path to the custom dictionary #[serde(default)] - pub path: String, + pub path: PathBuf, /// Allow adding words to this dictionary #[serde(default)] @@ -31,9 +31,9 @@ impl CustomDictionariesEntry { config_file_path .parent() .ok_or(io::Error::from(io::ErrorKind::NotFound))? - .join(Path::new(&self.path)) + .join(&self.path) } else { - PathBuf::from(&self.path) + self.path.clone() }; path::absolute(&full_path) diff --git a/crates/codebook/tests/test_custom_dicts.rs b/crates/codebook/tests/test_custom_dicts.rs index 17748c6..466fbaf 100644 --- a/crates/codebook/tests/test_custom_dicts.rs +++ b/crates/codebook/tests/test_custom_dicts.rs @@ -14,7 +14,7 @@ pub fn get_processor(enable_custom_dict: bool) -> Codebook { let custom_dict_path = example_file_path("custom_dict.txt"); let custom_dict = CustomDictionariesEntry { name: custom_dict_name.to_owned(), - path: custom_dict_path, + path: custom_dict_path.into(), ..Default::default() }; config.add_custom_dict(custom_dict); From 28b339674287efb70780eaec5de4a3e8e022209e Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Thu, 6 Nov 2025 17:42:56 +0200 Subject: [PATCH 13/14] chore: replace redundant static with const --- crates/codebook-config/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index fde91d6..e3e0808 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -15,9 +15,9 @@ use std::io::ErrorKind; use std::path::{Path, PathBuf}; use std::sync::{Arc, RwLock}; -static CACHE_DIR: &str = "codebook"; -static GLOBAL_CONFIG_FILE: &str = "codebook.toml"; -static USER_CONFIG_FILES: [&str; 2] = ["codebook.toml", ".codebook.toml"]; +const CACHE_DIR: &str = "codebook"; +const GLOBAL_CONFIG_FILE: &str = "codebook.toml"; +const USER_CONFIG_FILES: [&str; 2] = ["codebook.toml", ".codebook.toml"]; /// The main trait for Codebook configuration. pub trait CodebookConfig: Sync + Send + Debug { From 25adc48eff4fe0f13b845b387fdb0843960a08fb Mon Sep 17 00:00:00 2001 From: Mayrom Rabinovich Date: Fri, 7 Nov 2025 12:57:06 +0200 Subject: [PATCH 14/14] Revert "chore: convert path to PathBuf" This reverts commit e7724694ab35925898776b04e419cc20457d52cb. it had a bug with adding to relative dicts --- crates/codebook-config/src/lib.rs | 4 ++-- crates/codebook-config/src/settings.rs | 6 +++--- crates/codebook/tests/test_custom_dicts.rs | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index e3e0808..bf8cd60 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -1108,13 +1108,13 @@ mod tests { let expected = vec![ CustomDictionariesEntry { name: "absolute".to_owned(), - path: absolute_custom_dict_path.clone(), + path: absolute_custom_dict_path.to_str().unwrap().to_string(), allow_add_words: true, config_file_path: Some(config_path.clone()), }, CustomDictionariesEntry { name: "relative".to_owned(), - path: relative_custom_dict_path.clone(), + path: relative_custom_dict_path.to_str().unwrap().to_string(), allow_add_words: false, config_file_path: Some(config_path.clone()), }, diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index d278562..ea81d99 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -14,7 +14,7 @@ pub struct CustomDictionariesEntry { /// An absolute or relative path to the custom dictionary #[serde(default)] - pub path: PathBuf, + pub path: String, /// Allow adding words to this dictionary #[serde(default)] @@ -31,9 +31,9 @@ impl CustomDictionariesEntry { config_file_path .parent() .ok_or(io::Error::from(io::ErrorKind::NotFound))? - .join(&self.path) + .join(Path::new(&self.path)) } else { - self.path.clone() + PathBuf::from(&self.path) }; path::absolute(&full_path) diff --git a/crates/codebook/tests/test_custom_dicts.rs b/crates/codebook/tests/test_custom_dicts.rs index 466fbaf..17748c6 100644 --- a/crates/codebook/tests/test_custom_dicts.rs +++ b/crates/codebook/tests/test_custom_dicts.rs @@ -14,7 +14,7 @@ pub fn get_processor(enable_custom_dict: bool) -> Codebook { let custom_dict_path = example_file_path("custom_dict.txt"); let custom_dict = CustomDictionariesEntry { name: custom_dict_name.to_owned(), - path: custom_dict_path.into(), + path: custom_dict_path, ..Default::default() }; config.add_custom_dict(custom_dict);