Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.24.0...3.31.6)
project(libchewing LANGUAGES C)

set(CMAKE_PROJECT_VERSION 0.11.0)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

find_package(Git)
if(Git_FOUND)
Expand Down
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ What's New in libchewing (unreleased)
* Features
- dict: loading user dictionary are now also controlled by enabled_dicts
in `chewing_new3()`.
- dict: deleted phrases now can be recorded in a separate chewing-deleted.dat
exclusion dictionary. This allows excluding phrases from even built-in
dictionaries. Deleted phrases will not be auto learned again.

* Bug Fixes
- dict: fixed parsing trie dictionary file with extension fields.
Expand Down
4 changes: 2 additions & 2 deletions capi/src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ pub unsafe extern "C" fn chewing_new2(
chewing_new3(
syspath,
userpath,
c"word.dat,tsi.dat,chewing.dat".as_ptr(),
c"word.dat,tsi.dat,chewing.dat,chewing-deleted.dat".as_ptr(),
logger,
loggerdata,
)
Expand Down Expand Up @@ -237,7 +237,7 @@ pub unsafe extern "C" fn chewing_new3(
/// don't need to be freed.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn chewing_get_defaultDictionaryNames() -> *const c_char {
c"word.dat,tsi.dat,chewing.dat".as_ptr()
c"word.dat,tsi.dat,chewing.dat,chewing-deleted.dat".as_ptr()
}

/// Releases the resources used by the given Chewing IM instance.
Expand Down
61 changes: 57 additions & 4 deletions src/dictionary/layered.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::{BTreeMap, btree_map::Entry};
use std::collections::{BTreeMap, BTreeSet, btree_map::Entry};

use log::error;

Expand Down Expand Up @@ -74,6 +74,28 @@ impl Layered {
pub fn user_dict(&mut self) -> &mut dyn Dictionary {
self.dicts[self.user_dict_index].as_mut()
}
fn enabled_dicts(&self) -> impl Iterator<Item = &Box<dyn Dictionary>> {
self.dicts
.iter()
.filter(|d| d.about().usage != DictionaryUsage::ExcludeList)
}
fn exclusion_dicts(&self) -> impl Iterator<Item = &Box<dyn Dictionary>> {
self.dicts
.iter()
.filter(|d| d.about().usage == DictionaryUsage::ExcludeList)
}
fn exclusion_dicts_mut(&mut self) -> impl Iterator<Item = &mut Box<dyn Dictionary>> {
self.dicts
.iter_mut()
.filter(|d| d.about().usage == DictionaryUsage::ExcludeList)
}
pub(crate) fn is_excluded(&self, syllables: &[Syllable], phrase: &str) -> bool {
self.exclusion_dicts().any(|d| {
d.lookup(syllables, LookupStrategy::Standard)
.iter()
.any(|p| p.text.as_ref() == phrase)
})
}
}

impl Dictionary for Layered {
Expand All @@ -100,7 +122,7 @@ impl Dictionary for Layered {
let mut sort_map: BTreeMap<String, usize> = BTreeMap::new();
let mut phrases: Vec<Phrase> = Vec::new();

self.dicts.iter().for_each(|d| {
self.enabled_dicts().for_each(|d| {
for phrase in d.lookup(syllables, strategy) {
debug_assert!(!phrase.as_str().is_empty());
match sort_map.entry(phrase.to_string()) {
Expand All @@ -122,14 +144,24 @@ impl Dictionary for Layered {
}
}
});

// Remove excluded
let excluded: BTreeSet<Box<str>> = self
.exclusion_dicts()
.flat_map(|d| d.lookup(syllables, strategy))
.map(|p| p.text)
.collect();
phrases
.into_iter()
.filter(|p| !excluded.contains(&p.text))
.collect()
}

/// Returns all entries from all dictionaries.
///
/// **NOTE**: Duplicate entries are not removed.
fn entries(&self) -> Entries<'_> {
Box::new(self.dicts.iter().flat_map(|dict| dict.entries()))
Box::new(self.enabled_dicts().flat_map(|dict| dict.entries()))
}

fn about(&self) -> DictionaryInfo {
Expand All @@ -146,10 +178,20 @@ impl Dictionary for Layered {
fn set_usage(&mut self, _usage: DictionaryUsage) {}

fn reopen(&mut self) -> Result<(), UpdateDictionaryError> {
self.exclusion_dicts_mut().for_each(|d| {
if let Err(error) = d.reopen() {
error!("Failed to reopen exclusion dictionary: {error}");
}
});
self.user_dict().reopen()
}

fn flush(&mut self) -> Result<(), UpdateDictionaryError> {
self.exclusion_dicts_mut().for_each(|d| {
if let Err(error) = d.flush() {
error!("Failed to flush exclusion dictionary: {error}");
}
});
self.user_dict().flush()
}

Expand All @@ -162,6 +204,13 @@ impl Dictionary for Layered {
error!("BUG! added phrase is empty");
return Ok(());
}
self.exclusion_dicts_mut().for_each(|d| {
if let Err(error) = d.remove_phrase(syllables, &phrase.text) {
error!(
"Failed to remove {phrase} {syllables:?} from exclusion dictionary: {error}"
);
}
});
self.user_dict().add_phrase(syllables, phrase)
}

Expand All @@ -185,7 +234,11 @@ impl Dictionary for Layered {
syllables: &[Syllable],
phrase_str: &str,
) -> Result<(), UpdateDictionaryError> {
// TODO use exclude list
self.exclusion_dicts_mut().for_each(|d| {
if let Err(error) = d.add_phrase(syllables, (phrase_str, 0).into()) {
error!("Failed to add {phrase_str} {syllables:?} to exclusion dictionary: {error}");
}
});
self.user_dict().remove_phrase(syllables, phrase_str)
}
}
Expand Down
25 changes: 24 additions & 1 deletion src/dictionary/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ impl AssetLoader {
"chewing.dat" => {
dict.set_usage(DictionaryUsage::User);
}
"chewing-deleted.dat" => {
dict.set_usage(DictionaryUsage::ExcludeList);
}
_ => {
dict.set_usage(DictionaryUsage::Unknown);
}
Expand Down Expand Up @@ -168,11 +171,12 @@ impl UserDictionaryManager {
///
/// If no user dictionary were found, a new dictionary will be created at
/// the default path.
pub fn init(self) -> io::Result<Box<dyn Dictionary>> {
pub fn init(&self) -> io::Result<Box<dyn Dictionary>> {
let mut loader = SingleDictionaryLoader::new();
loader.migrate_sqlite(true);
let data_path = self
.data_path
.clone()
.or_else(userphrase_path)
.ok_or(io::Error::from(io::ErrorKind::NotFound))?;
if data_path.ends_with(UD_MEM_FILE_NAME) {
Expand Down Expand Up @@ -246,6 +250,25 @@ impl UserDictionaryManager {
fresh_dict.set_usage(DictionaryUsage::User);
Ok(fresh_dict)
}
/// Searches and initializes the user exclusion dictionary.
///
/// If no user exclusion dictionary were found, a new dictionary
/// will be created at the default path.
pub fn init_deleted(&self) -> io::Result<Box<dyn Dictionary>> {
let loader = SingleDictionaryLoader::new();
let data_path = self
.data_path
.clone()
.or_else(userphrase_path)
.ok_or(io::Error::from(io::ErrorKind::NotFound))?;
let userdata_dir = data_path.parent().expect("path should contain a filename");
if !userdata_dir.exists() {
info!("Creating userdata_dir: {}", userdata_dir.display());
fs::create_dir_all(&userdata_dir)?;
}
let exclude_dict_path = userdata_dir.join("chewing-deleted.dat");
Ok(loader.guess_format_and_load(&exclude_dict_path)?)
}
/// Load a in-memory user dictionary.
pub fn in_memory() -> Box<dyn Dictionary> {
info!("Use in memory trie dictionary");
Expand Down
24 changes: 10 additions & 14 deletions src/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,22 @@ mod usage;
#[derive(Debug)]
pub struct UpdateDictionaryError {
/// TODO: doc
message: &'static str,
source: Option<Box<dyn Error + Send + Sync>>,
}

impl UpdateDictionaryError {
pub(crate) fn new() -> UpdateDictionaryError {
UpdateDictionaryError { source: None }
}
}

impl From<io::Error> for UpdateDictionaryError {
fn from(value: io::Error) -> Self {
pub(crate) fn new(message: &'static str) -> UpdateDictionaryError {
UpdateDictionaryError {
source: Some(Box::new(value)),
message,
source: None,
}
}
}

impl Display for UpdateDictionaryError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "update dictionary failed")
write!(f, "update dictionary failed: {}", self.message)
}
}

Expand Down Expand Up @@ -358,14 +354,14 @@ pub trait Dictionary: Debug {
/// It should not fail if the dictionary is read-only or able to sync across
/// processes automatically.
fn reopen(&mut self) -> Result<(), UpdateDictionaryError> {
Err(UpdateDictionaryError { source: None })
Err(UpdateDictionaryError::new("unimplemented"))
}
/// Flushes all the changes back to the filesystem
///
/// The change made to the dictionary might not be persisted without
/// calling this method.
fn flush(&mut self) -> Result<(), UpdateDictionaryError> {
Err(UpdateDictionaryError { source: None })
Err(UpdateDictionaryError::new("unimplemented"))
}
/// An method for updating dictionaries.
///
Expand All @@ -390,7 +386,7 @@ pub trait Dictionary: Debug {
_syllables: &[Syllable],
_phrase: Phrase,
) -> Result<(), UpdateDictionaryError> {
Err(UpdateDictionaryError { source: None })
Err(UpdateDictionaryError::new("unimplemented"))
}
/// TODO: doc
fn update_phrase(
Expand All @@ -400,15 +396,15 @@ pub trait Dictionary: Debug {
_user_freq: u32,
_time: u64,
) -> Result<(), UpdateDictionaryError> {
Err(UpdateDictionaryError { source: None })
Err(UpdateDictionaryError::new("unimplemented"))
}
/// TODO: doc
fn remove_phrase(
&mut self,
_syllables: &[Syllable],
_phrase_str: &str,
) -> Result<(), UpdateDictionaryError> {
Err(UpdateDictionaryError { source: None })
Err(UpdateDictionaryError::new("unimplemented"))
}
}

Expand Down
Loading
Loading