From bac85586ff322a456584f865a7cbff3a080f5268 Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Sat, 7 Feb 2026 12:44:27 +0900 Subject: [PATCH 1/2] feat(dict): record deleted phrases in chewing-deleted.dat --- NEWS | 3 ++ capi/src/io.rs | 4 +- src/dictionary/layered.rs | 54 +++++++++++++++++++-- src/dictionary/loader.rs | 25 +++++++++- src/dictionary/mod.rs | 24 ++++------ src/dictionary/sqlite.rs | 96 ++++++++++++++++++++++++-------------- src/dictionary/trie_buf.rs | 45 ++++++++++-------- src/editor/mod.rs | 11 ++++- tests/testhelper.c | 19 +++++++- 9 files changed, 201 insertions(+), 80 deletions(-) diff --git a/NEWS b/NEWS index e52d7979..0a78e80b 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,9 @@ What's New in libchewing (unreleased) * Features - dict: loading user dictionary are now also controlled by enabled_dicts in `chewing_new3()`. + - dict: deleted phrases now can be recorded in a separate chewing-deleted.dat + exclusion dictionary. This allows excluding phrases from even built-in + dictionaries. * Bug Fixes - dict: fixed parsing trie dictionary file with extension fields. diff --git a/capi/src/io.rs b/capi/src/io.rs index 96efd258..6ea33a34 100644 --- a/capi/src/io.rs +++ b/capi/src/io.rs @@ -134,7 +134,7 @@ pub unsafe extern "C" fn chewing_new2( chewing_new3( syspath, userpath, - c"word.dat,tsi.dat,chewing.dat".as_ptr(), + c"word.dat,tsi.dat,chewing.dat,chewing-deleted.dat".as_ptr(), logger, loggerdata, ) @@ -237,7 +237,7 @@ pub unsafe extern "C" fn chewing_new3( /// don't need to be freed. #[unsafe(no_mangle)] pub unsafe extern "C" fn chewing_get_defaultDictionaryNames() -> *const c_char { - c"word.dat,tsi.dat,chewing.dat".as_ptr() + c"word.dat,tsi.dat,chewing.dat,chewing-deleted.dat".as_ptr() } /// Releases the resources used by the given Chewing IM instance. diff --git a/src/dictionary/layered.rs b/src/dictionary/layered.rs index 4dc4fa2f..2f05caf2 100644 --- a/src/dictionary/layered.rs +++ b/src/dictionary/layered.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, btree_map::Entry}; +use std::collections::{BTreeMap, BTreeSet, btree_map::Entry}; use log::error; @@ -74,6 +74,21 @@ impl Layered { pub fn user_dict(&mut self) -> &mut dyn Dictionary { self.dicts[self.user_dict_index].as_mut() } + fn enabled_dicts(&self) -> impl Iterator> { + self.dicts + .iter() + .filter(|d| d.about().usage != DictionaryUsage::ExcludeList) + } + fn exclusion_dicts(&self) -> impl Iterator> { + self.dicts + .iter() + .filter(|d| d.about().usage == DictionaryUsage::ExcludeList) + } + fn exclusion_dicts_mut(&mut self) -> impl Iterator> { + self.dicts + .iter_mut() + .filter(|d| d.about().usage == DictionaryUsage::ExcludeList) + } } impl Dictionary for Layered { @@ -100,7 +115,7 @@ impl Dictionary for Layered { let mut sort_map: BTreeMap = BTreeMap::new(); let mut phrases: Vec = Vec::new(); - self.dicts.iter().for_each(|d| { + self.enabled_dicts().for_each(|d| { for phrase in d.lookup(syllables, strategy) { debug_assert!(!phrase.as_str().is_empty()); match sort_map.entry(phrase.to_string()) { @@ -122,14 +137,24 @@ impl Dictionary for Layered { } } }); + + // Remove excluded + let excluded: BTreeSet> = self + .exclusion_dicts() + .flat_map(|d| d.lookup(syllables, strategy)) + .map(|p| p.text) + .collect(); phrases + .into_iter() + .filter(|p| !excluded.contains(&p.text)) + .collect() } /// Returns all entries from all dictionaries. /// /// **NOTE**: Duplicate entries are not removed. fn entries(&self) -> Entries<'_> { - Box::new(self.dicts.iter().flat_map(|dict| dict.entries())) + Box::new(self.enabled_dicts().flat_map(|dict| dict.entries())) } fn about(&self) -> DictionaryInfo { @@ -146,10 +171,20 @@ impl Dictionary for Layered { fn set_usage(&mut self, _usage: DictionaryUsage) {} fn reopen(&mut self) -> Result<(), UpdateDictionaryError> { + self.exclusion_dicts_mut().for_each(|d| { + if let Err(error) = d.reopen() { + error!("Failed to reopen exclusion dictionary: {error}"); + } + }); self.user_dict().reopen() } fn flush(&mut self) -> Result<(), UpdateDictionaryError> { + self.exclusion_dicts_mut().for_each(|d| { + if let Err(error) = d.flush() { + error!("Failed to flush exclusion dictionary: {error}"); + } + }); self.user_dict().flush() } @@ -162,6 +197,13 @@ impl Dictionary for Layered { error!("BUG! added phrase is empty"); return Ok(()); } + self.exclusion_dicts_mut().for_each(|d| { + if let Err(error) = d.remove_phrase(syllables, &phrase.text) { + error!( + "Failed to remove {phrase} {syllables:?} from exclusion dictionary: {error}" + ); + } + }); self.user_dict().add_phrase(syllables, phrase) } @@ -185,7 +227,11 @@ impl Dictionary for Layered { syllables: &[Syllable], phrase_str: &str, ) -> Result<(), UpdateDictionaryError> { - // TODO use exclude list + self.exclusion_dicts_mut().for_each(|d| { + if let Err(error) = d.add_phrase(syllables, (phrase_str, 0).into()) { + error!("Failed to add {phrase_str} {syllables:?} to exclusion dictionary: {error}"); + } + }); self.user_dict().remove_phrase(syllables, phrase_str) } } diff --git a/src/dictionary/loader.rs b/src/dictionary/loader.rs index 161dfdff..849eb535 100644 --- a/src/dictionary/loader.rs +++ b/src/dictionary/loader.rs @@ -94,6 +94,9 @@ impl AssetLoader { "chewing.dat" => { dict.set_usage(DictionaryUsage::User); } + "chewing-deleted.dat" => { + dict.set_usage(DictionaryUsage::ExcludeList); + } _ => { dict.set_usage(DictionaryUsage::Unknown); } @@ -168,11 +171,12 @@ impl UserDictionaryManager { /// /// If no user dictionary were found, a new dictionary will be created at /// the default path. - pub fn init(self) -> io::Result> { + pub fn init(&self) -> io::Result> { let mut loader = SingleDictionaryLoader::new(); loader.migrate_sqlite(true); let data_path = self .data_path + .clone() .or_else(userphrase_path) .ok_or(io::Error::from(io::ErrorKind::NotFound))?; if data_path.ends_with(UD_MEM_FILE_NAME) { @@ -246,6 +250,25 @@ impl UserDictionaryManager { fresh_dict.set_usage(DictionaryUsage::User); Ok(fresh_dict) } + /// Searches and initializes the user exclusion dictionary. + /// + /// If no user exclusion dictionary were found, a new dictionary + /// will be created at the default path. + pub fn init_deleted(&self) -> io::Result> { + let loader = SingleDictionaryLoader::new(); + let data_path = self + .data_path + .clone() + .or_else(userphrase_path) + .ok_or(io::Error::from(io::ErrorKind::NotFound))?; + let userdata_dir = data_path.parent().expect("path should contain a filename"); + if !userdata_dir.exists() { + info!("Creating userdata_dir: {}", userdata_dir.display()); + fs::create_dir_all(&userdata_dir)?; + } + let exclude_dict_path = userdata_dir.join("chewing-deleted.dat"); + Ok(loader.guess_format_and_load(&exclude_dict_path)?) + } /// Load a in-memory user dictionary. pub fn in_memory() -> Box { info!("Use in memory trie dictionary"); diff --git a/src/dictionary/mod.rs b/src/dictionary/mod.rs index be919cfc..2f347cdb 100644 --- a/src/dictionary/mod.rs +++ b/src/dictionary/mod.rs @@ -35,26 +35,22 @@ mod usage; #[derive(Debug)] pub struct UpdateDictionaryError { /// TODO: doc + message: &'static str, source: Option>, } impl UpdateDictionaryError { - pub(crate) fn new() -> UpdateDictionaryError { - UpdateDictionaryError { source: None } - } -} - -impl From for UpdateDictionaryError { - fn from(value: io::Error) -> Self { + pub(crate) fn new(message: &'static str) -> UpdateDictionaryError { UpdateDictionaryError { - source: Some(Box::new(value)), + message, + source: None, } } } impl Display for UpdateDictionaryError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "update dictionary failed") + write!(f, "update dictionary failed: {}", self.message) } } @@ -358,14 +354,14 @@ pub trait Dictionary: Debug { /// It should not fail if the dictionary is read-only or able to sync across /// processes automatically. fn reopen(&mut self) -> Result<(), UpdateDictionaryError> { - Err(UpdateDictionaryError { source: None }) + Err(UpdateDictionaryError::new("unimplemented")) } /// Flushes all the changes back to the filesystem /// /// The change made to the dictionary might not be persisted without /// calling this method. fn flush(&mut self) -> Result<(), UpdateDictionaryError> { - Err(UpdateDictionaryError { source: None }) + Err(UpdateDictionaryError::new("unimplemented")) } /// An method for updating dictionaries. /// @@ -390,7 +386,7 @@ pub trait Dictionary: Debug { _syllables: &[Syllable], _phrase: Phrase, ) -> Result<(), UpdateDictionaryError> { - Err(UpdateDictionaryError { source: None }) + Err(UpdateDictionaryError::new("unimplemented")) } /// TODO: doc fn update_phrase( @@ -400,7 +396,7 @@ pub trait Dictionary: Debug { _user_freq: u32, _time: u64, ) -> Result<(), UpdateDictionaryError> { - Err(UpdateDictionaryError { source: None }) + Err(UpdateDictionaryError::new("unimplemented")) } /// TODO: doc fn remove_phrase( @@ -408,7 +404,7 @@ pub trait Dictionary: Debug { _syllables: &[Syllable], _phrase_str: &str, ) -> Result<(), UpdateDictionaryError> { - Err(UpdateDictionaryError { source: None }) + Err(UpdateDictionaryError::new("unimplemented")) } } diff --git a/src/dictionary/sqlite.rs b/src/dictionary/sqlite.rs index 59beb9e2..72d68e20 100644 --- a/src/dictionary/sqlite.rs +++ b/src/dictionary/sqlite.rs @@ -326,14 +326,6 @@ impl SqliteDictionary { } } -impl From for UpdateDictionaryError { - fn from(source: RusqliteError) -> Self { - UpdateDictionaryError { - source: Some(source.into()), - } - } -} - impl Dictionary for SqliteDictionary { fn lookup(&self, syllables: &[Syllable], strategy: LookupStrategy) -> Vec { let _ = strategy; @@ -413,12 +405,19 @@ impl Dictionary for SqliteDictionary { } fn flush(&mut self) -> Result<(), UpdateDictionaryError> { + let make_error = |e| UpdateDictionaryError { + message: "flush sqlite failed", + source: Some(Box::new(e)), + }; if self.readonly { return Err(UpdateDictionaryError { - source: Some(Box::new(SqliteDictionaryError::ReadOnly)), + message: "sqlite dictionary is readonly", + source: None, }); } - self.conn.pragma_update(None, "wal_checkpoint", "PASSIVE")?; + self.conn + .pragma_update(None, "wal_checkpoint", "PASSIVE") + .map_err(make_error)?; Ok(()) } @@ -427,20 +426,29 @@ impl Dictionary for SqliteDictionary { syllables: &[Syllable], phrase: Phrase, ) -> Result<(), UpdateDictionaryError> { + let make_error = |e| UpdateDictionaryError { + message: "add phrae to sqlite failed", + source: Some(Box::new(e)), + }; if self.readonly { return Err(UpdateDictionaryError { - source: Some(Box::new(SqliteDictionaryError::ReadOnly)), + message: "sqlite dictionary is readonly", + source: None, }); } let syllables_bytes = syllables.to_bytes(); - let mut stmt = self.conn.prepare_cached( - "INSERT OR REPLACE INTO dictionary_v1 ( + let mut stmt = self + .conn + .prepare_cached( + "INSERT OR REPLACE INTO dictionary_v1 ( syllables, phrase, freq ) VALUES (?, ?, ?)", - )?; - stmt.execute(params![syllables_bytes, phrase.as_str(), phrase.freq()])?; + ) + .map_err(make_error)?; + stmt.execute(params![syllables_bytes, phrase.as_str(), phrase.freq()]) + .map_err(make_error)?; Ok(()) } @@ -451,52 +459,64 @@ impl Dictionary for SqliteDictionary { user_freq: u32, time: u64, ) -> Result<(), UpdateDictionaryError> { + let make_error = |e| UpdateDictionaryError { + message: "update phrae in sqlite failed", + source: Some(Box::new(e)), + }; // sqlite only supports i64 let time: i64 = time.clamp(0, i64::MAX as u64) as i64; if self.readonly { return Err(UpdateDictionaryError { - source: Some(Box::new(SqliteDictionaryError::ReadOnly)), + message: "sqlite dictionary is readonly", + source: None, }); } let syllables_bytes = syllables.to_bytes(); - let tx = self.conn.transaction()?; + let tx = self.conn.transaction().map_err(make_error)?; { - let mut stmt = tx.prepare_cached( - "SELECT userphrase_id FROM dictionary_v1 WHERE syllables = ? AND phrase = ?", - )?; + let mut stmt = tx + .prepare_cached( + "SELECT userphrase_id FROM dictionary_v1 WHERE syllables = ? AND phrase = ?", + ) + .map_err(make_error)?; let userphrase_id: Option> = stmt .query_row(params![syllables_bytes, phrase.as_str()], |row| row.get(0)) - .optional()?; + .optional() + .map_err(make_error)?; match userphrase_id { Some(Some(id)) => { - let mut stmt = - tx.prepare_cached("UPDATE userphrase_v2 SET user_freq = ? WHERE id = ?")?; - stmt.execute(params![user_freq, id])?; + let mut stmt = tx + .prepare_cached("UPDATE userphrase_v2 SET user_freq = ? WHERE id = ?") + .map_err(make_error)?; + stmt.execute(params![user_freq, id]).map_err(make_error)?; } Some(None) | None => { - let mut stmt = tx.prepare_cached( - "INSERT INTO userphrase_v2 (user_freq, time) VALUES (?, ?)", - )?; - stmt.execute(params![user_freq, time])?; + let mut stmt = tx + .prepare_cached("INSERT INTO userphrase_v2 (user_freq, time) VALUES (?, ?)") + .map_err(make_error)?; + stmt.execute(params![user_freq, time]).map_err(make_error)?; let userphrase_id = tx.last_insert_rowid(); - let mut stmt = tx.prepare_cached( - "INSERT OR REPLACE INTO dictionary_v1 ( + let mut stmt = tx + .prepare_cached( + "INSERT OR REPLACE INTO dictionary_v1 ( syllables, phrase, freq, userphrase_id ) VALUES (?, ?, ?, ?)", - )?; + ) + .map_err(make_error)?; stmt.execute(params![ syllables_bytes, phrase.as_str(), phrase.freq(), userphrase_id - ])?; + ]) + .map_err(make_error)?; } } } - tx.commit()?; + tx.commit().map_err(make_error)?; Ok(()) } @@ -505,11 +525,17 @@ impl Dictionary for SqliteDictionary { syllables: &[Syllable], phrase_str: &str, ) -> Result<(), UpdateDictionaryError> { + let make_error = |e| UpdateDictionaryError { + message: "remove phrae from sqlite failed", + source: Some(Box::new(e)), + }; let syllables_bytes = syllables.to_bytes(); let mut stmt = self .conn - .prepare_cached("DELETE FROM dictionary_v1 WHERE syllables = ? AND phrase = ?")?; - stmt.execute(params![syllables_bytes, phrase_str])?; + .prepare_cached("DELETE FROM dictionary_v1 WHERE syllables = ? AND phrase = ?") + .map_err(make_error)?; + stmt.execute(params![syllables_bytes, phrase_str]) + .map_err(make_error)?; Ok(()) } } diff --git a/src/dictionary/trie_buf.rs b/src/dictionary/trie_buf.rs index 2a9c4f0e..73dac10a 100644 --- a/src/dictionary/trie_buf.rs +++ b/src/dictionary/trie_buf.rs @@ -7,11 +7,11 @@ use std::{ thread::{self, JoinHandle}, }; -use log::{debug, error, info}; +use log::{debug, error, info, warn}; use super::{ - BuildDictionaryError, Dictionary, DictionaryBuilder, DictionaryInfo, DictionaryUsage, Entries, - LookupStrategy, Phrase, Trie, TrieBuilder, UpdateDictionaryError, + Dictionary, DictionaryBuilder, DictionaryInfo, DictionaryUsage, Entries, LookupStrategy, + Phrase, Trie, TrieBuilder, UpdateDictionaryError, }; use crate::zhuyin::Syllable; @@ -164,7 +164,8 @@ impl TrieBuf { .entries_iter_for(syllables, LookupStrategy::Standard) .any(|ph| ph.as_str() == phrase.as_str()) { - return Err(UpdateDictionaryError { source: None }); + warn!("phrase {} {syllables:?} already exist", phrase.text); + return Ok(()); } debug!("added phrase {} {syllables:?}", phrase.text); @@ -219,6 +220,10 @@ impl TrieBuf { pub(crate) fn sync(&mut self) -> Result<(), UpdateDictionaryError> { info!("Synchronize dictionary from disk..."); + let make_error = |e| UpdateDictionaryError { + message: "synchornize dictionary from disk failed", + source: Some(Box::new(e)), + }; if let Some(join_handle) = self.join_handle.take() { if !join_handle.is_finished() { info!("Aborted. Wait until previous sync is finished."); @@ -228,7 +233,7 @@ impl TrieBuf { match join_handle.join() { Ok(Ok(())) => { info!("Reloading..."); - self.trie = Some(Trie::open(self.path().unwrap())?); + self.trie = Some(Trie::open(self.path().unwrap()).map_err(make_error)?); if !self.dirty { self.btree.clear(); self.graveyard.clear(); @@ -245,13 +250,17 @@ impl TrieBuf { // TODO: reduce reading if self.path().is_some() { info!("Reloading..."); - self.trie = Some(Trie::open(self.path().unwrap())?); + self.trie = Some(Trie::open(self.path().unwrap()).map_err(make_error)?); } } Ok(()) } pub(crate) fn checkpoint(&mut self) { + let make_error = |e| UpdateDictionaryError { + message: "failed to save snapshot", + source: Some(Box::new(e)), + }; info!("Check pointing..."); if self.join_handle.is_some() { info!("Aborted. Wait until previous checkpoint result is handled."); @@ -271,15 +280,19 @@ impl TrieBuf { self.join_handle = Some(thread::spawn(move || { let mut builder = TrieBuilder::new(); info!("Saving snapshot..."); - builder.set_info(DictionaryInfo { - software: software_version(), - ..snapshot.about() - })?; + builder + .set_info(DictionaryInfo { + software: software_version(), + ..snapshot.about() + }) + .map_err(make_error)?; for (syllables, phrase) in snapshot.entries() { - builder.insert(&syllables, phrase)?; + builder.insert(&syllables, phrase).map_err(make_error)?; } info!("Flushing snapshot..."); - builder.build(snapshot.path().unwrap())?; + builder + .build(snapshot.path().unwrap()) + .map_err(make_error)?; info!(" Done"); Ok(()) })); @@ -287,14 +300,6 @@ impl TrieBuf { } } -impl From for UpdateDictionaryError { - fn from(value: BuildDictionaryError) -> Self { - UpdateDictionaryError { - source: Some(Box::new(value)), - } - } -} - impl Dictionary for TrieBuf { fn lookup(&self, syllables: &[Syllable], strategy: LookupStrategy) -> Vec { TrieBuf::lookup(self, syllables, strategy) diff --git a/src/editor/mod.rs b/src/editor/mod.rs index 67cdb54f..e5b4b240 100644 --- a/src/editor/mod.rs +++ b/src/editor/mod.rs @@ -221,6 +221,11 @@ impl Editor { .ok(); if custom_userpath { user_dict } else { None } }; + if enabled_dicts.iter().any(|d| d == "chewing-deleted.dat") { + if let Err(error) = user_dict_mgr.init_deleted() { + error!("Failed to load user exclusion dict: {error}"); + } + } let mut loader = AssetLoader::new(); if let Some(syspath) = search_path { loader = loader.search_path(syspath); @@ -620,7 +625,7 @@ impl SharedState { } Err(msg) => { msg.clone_into(&mut self.notice_buffer); - Err(UpdateDictionaryError::new()) + Err(UpdateDictionaryError::new("failed to learn new phrase")) } } } @@ -677,7 +682,9 @@ impl SharedState { &phrase, phrase.chars().count() ); - return Err(UpdateDictionaryError::new()); + return Err(UpdateDictionaryError::new( + "failed to learn phrase: syllables and phrase has different length", + )); } let phrases = self.dict.lookup(syllables, LookupStrategy::Standard); if phrases.is_empty() { diff --git a/tests/testhelper.c b/tests/testhelper.c index 21ec56e2..74013eb6 100644 --- a/tests/testhelper.c +++ b/tests/testhelper.c @@ -415,10 +415,25 @@ char *get_test_userphrase_path() return TEST_HASH_DIR "/" DB_NAME; } +char *get_test_user_deleted_path() +{ + char *path = getenv("TEST_USER_DELETED_PATH"); + + if (path) + return path; + else + return TEST_HASH_DIR "/" "chewing-deleted.dat"; +} + void clean_userphrase() { - char *userphrase_path = get_test_userphrase_path(); + char *path = get_test_userphrase_path(); + + if (remove(path) != 0 && errno != ENOENT) + fprintf(stderr, "remove fails at %s:%d\n", __FILE__, __LINE__); + + path = get_test_user_deleted_path(); - if (remove(userphrase_path) != 0 && errno != ENOENT) + if (remove(path) != 0 && errno != ENOENT) fprintf(stderr, "remove fails at %s:%d\n", __FILE__, __LINE__); } From b5b9862860088f362221d28594dbceb83200bf3a Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Sat, 7 Feb 2026 17:09:59 +0900 Subject: [PATCH 2/2] feat(dict): stop auto learning deleted phrases --- CMakeLists.txt | 1 + NEWS | 2 +- src/dictionary/layered.rs | 7 ++++ src/dictionary/trie.rs | 42 +++---------------- src/dictionary/trie_buf.rs | 49 +++++++++++++++++++--- src/editor/mod.rs | 4 ++ tests/test-bopomofo.c | 6 +++ tests/test-userphrase.c | 85 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 153 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 59e012d8..1305d715 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.24.0...3.31.6) project(libchewing LANGUAGES C) set(CMAKE_PROJECT_VERSION 0.11.0) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) find_package(Git) if(Git_FOUND) diff --git a/NEWS b/NEWS index 0a78e80b..c806e27e 100644 --- a/NEWS +++ b/NEWS @@ -6,7 +6,7 @@ What's New in libchewing (unreleased) in `chewing_new3()`. - dict: deleted phrases now can be recorded in a separate chewing-deleted.dat exclusion dictionary. This allows excluding phrases from even built-in - dictionaries. + dictionaries. Deleted phrases will not be auto learned again. * Bug Fixes - dict: fixed parsing trie dictionary file with extension fields. diff --git a/src/dictionary/layered.rs b/src/dictionary/layered.rs index 2f05caf2..3c925f35 100644 --- a/src/dictionary/layered.rs +++ b/src/dictionary/layered.rs @@ -89,6 +89,13 @@ impl Layered { .iter_mut() .filter(|d| d.about().usage == DictionaryUsage::ExcludeList) } + pub(crate) fn is_excluded(&self, syllables: &[Syllable], phrase: &str) -> bool { + self.exclusion_dicts().any(|d| { + d.lookup(syllables, LookupStrategy::Standard) + .iter() + .any(|p| p.text.as_ref() == phrase) + }) + } } impl Dictionary for Layered { diff --git a/src/dictionary/trie.rs b/src/dictionary/trie.rs index 0dcbc6d1..5ad2d7ae 100644 --- a/src/dictionary/trie.rs +++ b/src/dictionary/trie.rs @@ -1,5 +1,4 @@ use std::{ - cell::Cell, cmp::Ordering, collections::VecDeque, error::Error, @@ -9,7 +8,6 @@ use std::{ iter, num::NonZeroUsize, path::{Path, PathBuf}, - time::SystemTime, }; use der::{ @@ -17,7 +15,7 @@ use der::{ SliceReader, Tag, TagMode, TagNumber, Tagged, Writer, asn1::{ContextSpecificRef, OctetStringRef, Utf8StringRef}, }; -use log::{error, warn}; +use log::{debug, error}; use super::{ BuildDictionaryError, Dictionary, DictionaryBuilder, DictionaryInfo, Entries, LookupStrategy, @@ -266,7 +264,6 @@ impl Dictionary for Trie { // Return early for empty dictionary if root.child_begin() == root.child_end() { - warn!("[!] detected empty dictionary."); return vec![]; } @@ -1190,44 +1187,17 @@ impl DictionaryBuilder for TrieBuilder { self.write(&mut writer)?; writer.flush()?; database.sync_data()?; + debug!("rename from {} to {}", tmpname.display(), path.display()); fs::rename(&tmpname, path)?; Ok(()) } } -// xoshiro256** PRNG -// -// Ref: fn rand() -> u64 { - thread_local! { - static PRNG_STATE: Cell<[u64; 4]> = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .map(|du| { - Cell::new([ - du.as_secs(), - du.subsec_millis() as u64, - du.subsec_micros() as u64, - du.subsec_nanos() as u64, - ]) - }) - .unwrap_or_default(); - } - fn rol64(x: u64, k: u32) -> u64 { - x.wrapping_shl(k) | x.wrapping_shr(64 - k) - } - PRNG_STATE.with(|state| { - let mut s = state.get(); - let result = rol64(s[1].wrapping_mul(5), 7).wrapping_mul(9); - let t = s[1].wrapping_shl(17); - s[2] ^= s[0]; - s[3] ^= s[1]; - s[1] ^= s[2]; - s[0] ^= s[3]; - s[2] ^= t; - s[3] = rol64(s[3], 45); - state.set(s); - result - }) + use std::collections::hash_map::RandomState; + use std::hash::BuildHasher; + use std::hash::Hasher; + RandomState::new().build_hasher().finish() } impl Default for TrieBuilder { diff --git a/src/dictionary/trie_buf.rs b/src/dictionary/trie_buf.rs index 73dac10a..3eaf66c6 100644 --- a/src/dictionary/trie_buf.rs +++ b/src/dictionary/trie_buf.rs @@ -2,6 +2,7 @@ use std::{ borrow::Cow, cmp, collections::{BTreeMap, BTreeSet, btree_map::Entry}, + error::Error, io, path::{Path, PathBuf}, thread::{self, JoinHandle}, @@ -23,6 +24,8 @@ pub struct TrieBuf { graveyard: BTreeSet, join_handle: Option>>, dirty: bool, + // TODO: currently usage is not saved in file + usage: DictionaryUsage, } type PhraseKey = (Cow<'static, [Syllable]>, Cow<'static, str>); @@ -62,6 +65,7 @@ impl TrieBuf { graveyard: BTreeSet::new(), join_handle: None, dirty: false, + usage: DictionaryUsage::Unknown, }) } @@ -73,6 +77,7 @@ impl TrieBuf { graveyard: BTreeSet::new(), join_handle: None, dirty: false, + usage: DictionaryUsage::Unknown, } } @@ -218,6 +223,25 @@ impl TrieBuf { Ok(()) } + pub(crate) fn wait(&mut self) { + if let Some(join_handle) = self.join_handle.take() { + match join_handle.join() { + Ok(Err(error)) => { + error!("flushing dictionary failed: {error}"); + let mut error = &error as &(dyn Error + 'static); + while let Some(source) = error.source() { + error = source; + error!("|-> {error}"); + } + } + Err(error) => { + error!("flushing dictionary thread panicked: {error:?}"); + } + _ => {} + } + } + } + pub(crate) fn sync(&mut self) -> Result<(), UpdateDictionaryError> { info!("Synchronize dictionary from disk..."); let make_error = |e| UpdateDictionaryError { @@ -233,7 +257,9 @@ impl TrieBuf { match join_handle.join() { Ok(Ok(())) => { info!("Reloading..."); - self.trie = Some(Trie::open(self.path().unwrap()).map_err(make_error)?); + let mut trie = Trie::open(self.path().unwrap()).map_err(make_error)?; + trie.set_usage(self.usage); + self.trie = Some(trie); if !self.dirty { self.btree.clear(); self.graveyard.clear(); @@ -241,6 +267,11 @@ impl TrieBuf { } Ok(Err(e)) => { error!("Failed to flush dictionary due to error: {e}"); + let mut error = &e as &(dyn Error + 'static); + while let Some(source) = error.source() { + error = source; + error!("|-> {error}"); + } } Err(_) => { error!("Failed to join thread."); @@ -250,7 +281,9 @@ impl TrieBuf { // TODO: reduce reading if self.path().is_some() { info!("Reloading..."); - self.trie = Some(Trie::open(self.path().unwrap()).map_err(make_error)?); + let mut trie = Trie::open(self.path().unwrap()).map_err(make_error)?; + trie.set_usage(self.usage); + self.trie = Some(trie); } } Ok(()) @@ -276,6 +309,7 @@ impl TrieBuf { graveyard: self.graveyard.clone(), join_handle: None, dirty: false, + usage: self.usage, }; self.join_handle = Some(thread::spawn(move || { let mut builder = TrieBuilder::new(); @@ -289,7 +323,10 @@ impl TrieBuf { for (syllables, phrase) in snapshot.entries() { builder.insert(&syllables, phrase).map_err(make_error)?; } - info!("Flushing snapshot..."); + info!( + "Flushing snapshot to {}...", + snapshot.path().unwrap().display() + ); builder .build(snapshot.path().unwrap()) .map_err(make_error)?; @@ -320,6 +357,7 @@ impl Dictionary for TrieBuf { } fn set_usage(&mut self, usage: DictionaryUsage) { + self.usage = usage; if let Some(trie) = self.trie.as_mut() { trie.set_usage(usage); } @@ -376,11 +414,10 @@ impl, const N: usize> From<[(Vec, Vec

); N]> for Tri impl Drop for TrieBuf { fn drop(&mut self) { + self.wait(); let _ = self.sync(); let _ = self.flush(); - if let Some(join_handle) = self.join_handle.take() { - let _ = join_handle.join(); - } + self.wait(); } } diff --git a/src/editor/mod.rs b/src/editor/mod.rs index e5b4b240..8086d92a 100644 --- a/src/editor/mod.rs +++ b/src/editor/mod.rs @@ -776,6 +776,10 @@ impl SharedState { } fn auto_learn(&mut self, intervals: &[Interval]) { for (syllables, phrase) in collect_new_phrases(intervals, self.com.symbols()) { + if self.dict.is_excluded(&syllables, &phrase) { + debug!("skip autolearn excluded phrase {phrase} {syllables:?}"); + continue; + } if let Err(error) = self.learn_phrase(&syllables, &phrase) { error!("Failed to learn phrase {phrase} from {syllables:?}: {error:#}"); } diff --git a/tests/test-bopomofo.c b/tests/test-bopomofo.c index 79d65210..02f7ea03 100644 --- a/tests/test-bopomofo.c +++ b/tests/test-bopomofo.c @@ -2202,6 +2202,8 @@ void test_KB_ET26_choice_append() void test_KB_DACHEN_CP26() { + clean_userphrase(); + ChewingContext *ctx; ctx = chewing_new(); @@ -2277,6 +2279,8 @@ void test_KB_DACHEN_CP26() void test_KB_GIN_YIEH() { + clean_userphrase(); + ChewingContext *ctx; ctx = chewing_new(); start_testcase(ctx); @@ -2296,6 +2300,8 @@ void test_KB_GIN_YIEH() void test_KB_IBM() { + clean_userphrase(); + ChewingContext *ctx; ctx = chewing_new(); start_testcase(ctx); diff --git a/tests/test-userphrase.c b/tests/test-userphrase.c index aa0a2c57..bf46fc66 100644 --- a/tests/test-userphrase.c +++ b/tests/test-userphrase.c @@ -420,12 +420,55 @@ void test_userphrase_auto_learn_only_after_commit() chewing_delete(ctx); } +void test_userphrase_auto_learn_skip_excluded() +{ + ChewingContext *ctx = NULL; + char p[] = "下雪"; + char b[] = "ㄒㄧㄚˋ ㄒㄩㄝˇ"; + + static const char *CAND_1[] = { + "下雪" + }; + static const char *CAND_2[] = { + "雪", "鱈" + }; + int ret = 0; + + clean_userphrase(); + + start_testcase(ctx); + + ctx = chewing_new(); + chewing_set_phraseChoiceRearward(ctx, 1); + + type_keystroke_by_string(ctx, "vu84vm,3"); + type_keystroke_by_string(ctx, "vu84vm,3"); + ok_candidate(ctx, CAND_1, ARRAY_SIZE(CAND_1)); + ret = chewing_userphrase_remove(ctx, p, b); + ok(ret == 1, "chewing_userphrase_remove() return value `%d' shall be `%d'", ret, 1); + ret = chewing_userphrase_lookup(ctx, p, b); + ok(ret == 0, "chewing_userphrase_lookup() return value `%d' shall be `%d'", ret, 0); + chewing_delete(ctx); + ctx = NULL; + + ctx = chewing_new(); + type_keystroke_by_string(ctx, "vu84vm,3"); + ret = chewing_userphrase_lookup(ctx, p, b); + ok(ret == 0, "chewing_userphrase_lookup() return value `%d' shall be `%d'", ret, 0); + type_keystroke_by_string(ctx, "vu84vm,3"); + ok_candidate(ctx, CAND_2, ARRAY_SIZE(CAND_2)); + + chewing_delete(ctx); + ctx = NULL; +} + void test_userphrase_autolearn() { test_userphrase_auto_learn(); test_userphrase_auto_learn_with_symbol(); test_userphrase_auto_learn_hardcode_break(); test_userphrase_auto_learn_only_after_commit(); + test_userphrase_auto_learn_skip_excluded(); } void test_userphrase_enumerate_normal() @@ -914,6 +957,47 @@ void test_userphrase_remove() ctx = NULL; } +void test_userphrase_remove_builtin() +{ + ChewingContext *ctx = NULL; + char p[] = "下雪"; + char b[] = "ㄒㄧㄚˋ ㄒㄩㄝˇ"; + + static const char *CAND_1[] = { + "下雪" + }; + static const char *CAND_2[] = { + "雪", "鱈" + }; + int ret = 0; + + clean_userphrase(); + + start_testcase(ctx); + + ctx = chewing_new(); + chewing_set_phraseChoiceRearward(ctx, 1); + + type_keystroke_by_string(ctx, "vu84vm,3"); + type_keystroke_by_string(ctx, "vu84vm,3"); + ok_candidate(ctx, CAND_1, ARRAY_SIZE(CAND_1)); + ret = chewing_userphrase_remove(ctx, p, b); + ok(ret == 1, "chewing_userphrase_remove() return value `%d' shall be `%d'", ret, 1); + ret = chewing_userphrase_lookup(ctx, p, b); + ok(ret == 0, "chewing_userphrase_lookup() return value `%d' shall be `%d'", ret, 0); + chewing_delete(ctx); + ctx = NULL; + + ctx = chewing_new(); + ret = chewing_userphrase_lookup(ctx, p, b); + ok(ret == 0, "chewing_userphrase_lookup() return value `%d' shall be `%d'", ret, 0); + type_keystroke_by_string(ctx, "vu84vm,3"); + ok_candidate(ctx, CAND_2, ARRAY_SIZE(CAND_2)); + + chewing_delete(ctx); + ctx = NULL; +} + int main(int argc, char *argv[]) { putenv("CHEWING_PATH=" CHEWING_DATA_PREFIX); @@ -928,6 +1012,7 @@ int main(int argc, char *argv[]) test_userphrase_lookup(); test_userphrase_double_free(); test_userphrase_remove(); + test_userphrase_remove_builtin(); return exit_status(); }