From a110f54523d9f2c9427152021e1ef3b97479ac5c Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Fri, 30 Jan 2026 21:57:01 +0900 Subject: [PATCH] feat(dict): introduce the usage property --- .helix/languages.toml | 2 + NEWS | 10 +++ capi/src/io.rs | 4 +- src/dictionary/layered.rs | 111 +++++++++++++------------ src/dictionary/loader.rs | 134 ++++++++++++++++++++---------- src/dictionary/mod.rs | 10 ++- src/dictionary/sqlite.rs | 4 +- src/dictionary/trie.asn1 | 4 +- src/dictionary/trie.rs | 49 +++++++++-- src/dictionary/trie_buf.rs | 22 ++++- src/dictionary/usage.rs | 69 +++++++++++++++ src/editor/mod.rs | 108 +++++++++++++----------- src/path.rs | 32 +++---- src/zhuyin/syllable.rs | 12 ++- tests/data/golden-chewing.sqlite3 | Bin 20480 -> 20480 bytes tests/test-userphrase.c | 4 + tools/src/dump.rs | 2 + tools/src/flags.rs | 4 + tools/src/info.rs | 12 +-- tools/src/init_database.rs | 4 + 20 files changed, 411 insertions(+), 186 deletions(-) create mode 100644 .helix/languages.toml create mode 100644 src/dictionary/usage.rs diff --git a/.helix/languages.toml b/.helix/languages.toml new file mode 100644 index 000000000..12e819359 --- /dev/null +++ b/.helix/languages.toml @@ -0,0 +1,2 @@ +[language-server.rust-analyzer.config] +cargo.features = ["sqlite"] diff --git a/NEWS b/NEWS index a2d806c89..e52d7979d 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,17 @@ What's New in libchewing (unreleased) --------------------------------------------------------- +* Features + - dict: loading user dictionary are now also controlled by enabled_dicts + in `chewing_new3()`. + +* Bug Fixes + - dict: fixed parsing trie dictionary file with extension fields. + * Changes + - rust: breaking! renamed SystemDictionaryLoader to AssetLoader. + - rust: breaking! renamed UserDictionaryLoader to UserDictionaryManager. + - rust: Dictionary trait gained a new `set_usage()` method. - conversion: adjust max output paths down to 10. What's New in libchewing 0.11.0 (January 10, 2026) diff --git a/capi/src/io.rs b/capi/src/io.rs index 520b2bfaf..96efd2580 100644 --- a/capi/src/io.rs +++ b/capi/src/io.rs @@ -134,7 +134,7 @@ pub unsafe extern "C" fn chewing_new2( chewing_new3( syspath, userpath, - c"word.dat,tsi.dat".as_ptr(), + c"word.dat,tsi.dat,chewing.dat".as_ptr(), logger, loggerdata, ) @@ -237,7 +237,7 @@ pub unsafe extern "C" fn chewing_new3( /// don't need to be freed. #[unsafe(no_mangle)] pub unsafe extern "C" fn chewing_get_defaultDictionaryNames() -> *const c_char { - c"word.dat,tsi.dat".as_ptr() + c"word.dat,tsi.dat,chewing.dat".as_ptr() } /// Releases the resources used by the given Chewing IM instance. diff --git a/src/dictionary/layered.rs b/src/dictionary/layered.rs index 82a0b619d..4dc4fa2ff 100644 --- a/src/dictionary/layered.rs +++ b/src/dictionary/layered.rs @@ -1,12 +1,12 @@ -use std::{ - collections::{BTreeMap, btree_map::Entry}, - iter, -}; +use std::collections::{BTreeMap, btree_map::Entry}; use log::error; use super::{Dictionary, DictionaryInfo, Entries, LookupStrategy, Phrase, UpdateDictionaryError}; -use crate::zhuyin::Syllable; +use crate::{ + dictionary::{DictionaryUsage, TrieBuf}, + zhuyin::Syllable, +}; /// A collection of dictionaries that returns the union of the lookup results. /// # Examples @@ -29,7 +29,7 @@ use crate::zhuyin::Syllable; /// vec![("策", 100), ("冊", 100)] /// )]); /// -/// let dict = Layered::new(vec![Box::new(sys_dict)], Box::new(user_dict)); +/// let dict = Layered::new(vec![Box::new(sys_dict), Box::new(user_dict)]); /// assert_eq!( /// [ /// ("側", 1, 0).into(), @@ -48,20 +48,31 @@ use crate::zhuyin::Syllable; /// ``` #[derive(Debug)] pub struct Layered { - sys_dict: Vec>, - user_dict: Box, + dicts: Vec>, + user_dict_index: usize, } impl Layered { /// Creates a new `Layered` with the list of dictionaries. - pub fn new(sys_dict: Vec>, user_dict: Box) -> Layered { + pub fn new(mut dicts: Vec>) -> Layered { + let user_dict_index = dicts.iter().enumerate().find_map(|d| { + if d.1.about().usage == DictionaryUsage::User { + Some(d.0) + } else { + None + } + }); + if user_dict_index.is_none() { + dicts.push(Box::new(TrieBuf::new_in_memory())); + } + let user_dict_index = user_dict_index.unwrap_or(dicts.len() - 1); Layered { - sys_dict, - user_dict, + dicts, + user_dict_index, } } pub fn user_dict(&mut self) -> &mut dyn Dictionary { - self.user_dict.as_mut() + self.dicts[self.user_dict_index].as_mut() } } @@ -89,31 +100,28 @@ impl Dictionary for Layered { let mut sort_map: BTreeMap = BTreeMap::new(); let mut phrases: Vec = Vec::new(); - self.sys_dict - .iter() - .chain(iter::once(&self.user_dict)) - .for_each(|d| { - for phrase in d.lookup(syllables, strategy) { - debug_assert!(!phrase.as_str().is_empty()); - match sort_map.entry(phrase.to_string()) { - Entry::Occupied(entry) => { - let index = *entry.get(); - phrases[index].freq += phrase.freq; - phrases[index].last_used = - match (phrases[index].last_used, phrase.last_used) { - (Some(orig), Some(new)) => Some(u64::max(orig, new)), - (Some(orig), None) => Some(orig), - (None, Some(new)) => Some(new), - (None, None) => None, - }; - } - Entry::Vacant(entry) => { - entry.insert(phrases.len()); - phrases.push(phrase); - } + self.dicts.iter().for_each(|d| { + for phrase in d.lookup(syllables, strategy) { + debug_assert!(!phrase.as_str().is_empty()); + match sort_map.entry(phrase.to_string()) { + Entry::Occupied(entry) => { + let index = *entry.get(); + phrases[index].freq += phrase.freq; + phrases[index].last_used = + match (phrases[index].last_used, phrase.last_used) { + (Some(orig), Some(new)) => Some(u64::max(orig, new)), + (Some(orig), None) => Some(orig), + (None, Some(new)) => Some(new), + (None, None) => None, + }; + } + Entry::Vacant(entry) => { + entry.insert(phrases.len()); + phrases.push(phrase); } } - }); + } + }); phrases } @@ -121,12 +129,7 @@ impl Dictionary for Layered { /// /// **NOTE**: Duplicate entries are not removed. fn entries(&self) -> Entries<'_> { - Box::new( - self.sys_dict - .iter() - .chain(iter::once(&self.user_dict)) - .flat_map(|dict| dict.entries()), - ) + Box::new(self.dicts.iter().flat_map(|dict| dict.entries())) } fn about(&self) -> DictionaryInfo { @@ -140,12 +143,14 @@ impl Dictionary for Layered { None } + fn set_usage(&mut self, _usage: DictionaryUsage) {} + fn reopen(&mut self) -> Result<(), UpdateDictionaryError> { - self.user_dict.reopen() + self.user_dict().reopen() } fn flush(&mut self) -> Result<(), UpdateDictionaryError> { - self.user_dict.flush() + self.user_dict().flush() } fn add_phrase( @@ -157,7 +162,7 @@ impl Dictionary for Layered { error!("BUG! added phrase is empty"); return Ok(()); } - self.user_dict.add_phrase(syllables, phrase) + self.user_dict().add_phrase(syllables, phrase) } fn update_phrase( @@ -171,7 +176,7 @@ impl Dictionary for Layered { error!("BUG! added phrase is empty"); return Ok(()); } - self.user_dict + self.user_dict() .update_phrase(syllables, phrase, user_freq, time) } @@ -180,7 +185,8 @@ impl Dictionary for Layered { syllables: &[Syllable], phrase_str: &str, ) -> Result<(), UpdateDictionaryError> { - self.user_dict.remove_phrase(syllables, phrase_str) + // TODO use exclude list + self.user_dict().remove_phrase(syllables, phrase_str) } } @@ -194,7 +200,8 @@ mod tests { use super::Layered; use crate::{ dictionary::{ - Dictionary, DictionaryBuilder, LookupStrategy, Phrase, Trie, TrieBuf, TrieBuilder, + Dictionary, DictionaryBuilder, DictionaryUsage, LookupStrategy, Phrase, Trie, TrieBuf, + TrieBuilder, }, syl, zhuyin::Bopomofo, @@ -211,7 +218,7 @@ mod tests { vec![("策", 100), ("冊", 100)], )]); - let dict = Layered::new(vec![Box::new(sys_dict)], Box::new(user_dict)); + let dict = Layered::new(vec![Box::new(sys_dict), Box::new(user_dict)]); assert_eq!( [ ( @@ -253,7 +260,7 @@ mod tests { vec![("策", 100), ("冊", 100)], )]); - let dict = Layered::new(vec![Box::new(sys_dict)], Box::new(user_dict)); + let dict = Layered::new(vec![Box::new(sys_dict), Box::new(user_dict)]); assert_eq!( Some(("側", 1, 0).into()), dict.lookup( @@ -287,6 +294,7 @@ mod tests { vec![("測", 1), ("冊", 1), ("側", 1)], )]); let mut builder = TrieBuilder::new(); + builder.set_usage(DictionaryUsage::User); builder.insert( &[syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]], ("策", 100, 0).into(), @@ -298,9 +306,10 @@ mod tests { let mut cursor = Cursor::new(vec![]); builder.write(&mut cursor)?; cursor.rewind()?; - let user_dict = Trie::new(&mut cursor)?; + let mut user_dict = Trie::new(&mut cursor)?; + user_dict.set_usage(DictionaryUsage::User); - let mut dict = Layered::new(vec![Box::new(sys_dict)], Box::new(user_dict)); + let mut dict = Layered::new(vec![Box::new(sys_dict), Box::new(user_dict)]); assert_eq!( Some(("側", 1, 0).into()), dict.lookup( diff --git a/src/dictionary/loader.rs b/src/dictionary/loader.rs index 6530213f8..161dfdffb 100644 --- a/src/dictionary/loader.rs +++ b/src/dictionary/loader.rs @@ -13,8 +13,9 @@ use log::{error, info}; use super::SqliteDictionary; use super::{Dictionary, TrieBuf, uhash}; use crate::{ + dictionary::DictionaryUsage, editor::{AbbrevTable, SymbolSelector}, - path::{find_files_by_names, find_path_by_files, sys_path_from_env_var, userphrase_path}, + path::{find_files_by_names, find_path_by_files, search_path_from_env_var, userphrase_path}, }; const UD_UHASH_FILE_NAME: &str = "uhash.dat"; @@ -24,12 +25,12 @@ const UD_MEM_FILE_NAME: &str = ":memory:"; const ABBREV_FILE_NAME: &str = "swkb.dat"; const SYMBOLS_FILE_NAME: &str = "symbols.dat"; -pub const DEFAULT_DICT_NAMES: &[&str] = &["word.dat", "tsi.dat"]; +pub const DEFAULT_DICT_NAMES: &[&str] = &["word.dat", "tsi.dat", "chewing.dat"]; -/// Automatically searchs and loads system dictionaries. +/// Automatically searchs and loads dictionaries. #[derive(Debug, Default)] -pub struct SystemDictionaryLoader { - sys_path: Option, +pub struct AssetLoader { + search_path: Option, } /// Errors during loading system or user dictionaries. @@ -53,27 +54,29 @@ fn io_err(err: io::Error) -> LoadDictionaryError { LoadDictionaryError::IoError(err) } -impl SystemDictionaryLoader { - /// Creates a new system dictionary loader. - pub fn new() -> SystemDictionaryLoader { - SystemDictionaryLoader::default() +impl AssetLoader { + /// Creates a new dictionary loader. + pub fn new() -> AssetLoader { + AssetLoader::default() } - /// Override the default system dictionary search path. - pub fn sys_path(mut self, search_path: impl Into) -> SystemDictionaryLoader { - self.sys_path = Some(search_path.into()); + /// Override the default dictionary search path. + pub fn search_path(mut self, search_path: impl Into) -> AssetLoader { + self.search_path = Some(search_path.into()); self } /// Searches and loads the specified dictionaries. /// - /// Search path can be changed using [`sys_path`][SystemDictionaryLoader::sys_path]. - pub fn load(&self, names: &[T]) -> Result>, LoadDictionaryError> + /// Search path can be changed using [`search_path`][SystemDictionaryLoader::search_path]. + /// + /// Any dictionary that is not in the search paths or cannot be load is skipped. + pub fn load(&self, names: &[T]) -> Vec> where T: AsRef, { - let search_path = if let Some(sys_path) = &self.sys_path { - sys_path.to_owned() + let search_path = if let Some(path) = &self.search_path { + path.to_owned() } else { - sys_path_from_env_var() + search_path_from_env_var() }; let loader = SingleDictionaryLoader::new(); let files = find_files_by_names(&search_path, names); @@ -82,67 +85,90 @@ impl SystemDictionaryLoader { for file in files.iter() { if let Some(file_name) = file.file_name() && target_name.as_ref() == file_name.to_string_lossy() - && let Ok(dict) = loader.guess_format_and_load(file) + && let Ok(mut dict) = loader.guess_format_and_load(file) { - info!("Load dictionary {}", file.display()); + match target_name.as_ref() { + "tsi.dat" | "word.dat" => { + dict.set_usage(DictionaryUsage::BuiltIn); + } + "chewing.dat" => { + dict.set_usage(DictionaryUsage::User); + } + _ => { + dict.set_usage(DictionaryUsage::Unknown); + } + } results.push(dict); continue 'next; } } error!("Dictionary file not found: {}", target_name.as_ref()); - return Err(LoadDictionaryError::NotFound); + continue; } - Ok(results) + results } /// Loads the abbrev table. pub fn load_abbrev(&self) -> Result { - let search_path = if let Some(sys_path) = &self.sys_path { - sys_path.to_owned() + let search_path = if let Some(path) = &self.search_path { + path.to_owned() } else { - sys_path_from_env_var() + search_path_from_env_var() }; - let sys_path = find_path_by_files(&search_path, &[ABBREV_FILE_NAME]) + let parent_path = find_path_by_files(&search_path, &[ABBREV_FILE_NAME]) .ok_or(LoadDictionaryError::NotFound)?; - let abbrev_path = sys_path.join(ABBREV_FILE_NAME); + let abbrev_path = parent_path.join(ABBREV_FILE_NAME); info!("Loading {ABBREV_FILE_NAME}"); AbbrevTable::open(abbrev_path).map_err(io_err) } /// Loads the symbol table. pub fn load_symbol_selector(&self) -> Result { - let search_path = if let Some(sys_path) = &self.sys_path { - sys_path.to_owned() + let search_path = if let Some(path) = &self.search_path { + path.to_owned() } else { - sys_path_from_env_var() + search_path_from_env_var() }; - let sys_path = find_path_by_files(&search_path, &[SYMBOLS_FILE_NAME]) + let parent_path = find_path_by_files(&search_path, &[SYMBOLS_FILE_NAME]) .ok_or(LoadDictionaryError::NotFound)?; - let symbol_path = sys_path.join(SYMBOLS_FILE_NAME); + let symbol_path = parent_path.join(SYMBOLS_FILE_NAME); info!("Loading {SYMBOLS_FILE_NAME}"); SymbolSelector::open(symbol_path).map_err(io_err) } } -/// Automatically searches and loads the user dictionary. +/// Automatically searches and initializes the user dictionary. #[derive(Debug, Default)] -pub struct UserDictionaryLoader { +pub struct UserDictionaryManager { data_path: Option, } -impl UserDictionaryLoader { - /// Creates a user dictionary loader. - pub fn new() -> UserDictionaryLoader { - UserDictionaryLoader::default() +impl UserDictionaryManager { + /// Creates a user dictionary manager. + pub fn new() -> UserDictionaryManager { + UserDictionaryManager::default() } - /// Override the default user dictionary search path. - pub fn userphrase_path(mut self, path: impl AsRef) -> UserDictionaryLoader { + /// Override the default user dictionary path. + pub fn userphrase_path(mut self, path: impl AsRef) -> UserDictionaryManager { self.data_path = Some(path.as_ref().to_path_buf()); self } - /// Searches and loads the user dictionary. + /// Return the resolved file name of the user dictionary file. + pub fn file_name(&self) -> Option { + self.data_path + .clone() + .or_else(userphrase_path) + .and_then(|p| { + if p.is_file() { + p.file_name().map(|p| p.to_string_lossy().into_owned()) + } else { + None + } + }) + } + /// Searches and initializes the user dictionary. /// /// If no user dictionary were found, a new dictionary will be created at /// the default path. - pub fn load(self) -> io::Result> { + pub fn init(self) -> io::Result> { let mut loader = SingleDictionaryLoader::new(); loader.migrate_sqlite(true); let data_path = self @@ -153,24 +179,34 @@ impl UserDictionaryLoader { return Ok(Self::in_memory()); } if data_path.exists() { - info!("Loading {}", data_path.display()); - return loader.guess_format_and_load(&data_path); + info!("Use existing user dictionary {}", data_path.display()); + return loader.guess_format_and_load(&data_path).map(|mut dict| { + dict.set_usage(DictionaryUsage::User); + dict + }); } let userdata_dir = data_path.parent().expect("path should contain a filename"); if !userdata_dir.exists() { info!("Creating userdata_dir: {}", userdata_dir.display()); fs::create_dir_all(userdata_dir)?; } - info!("Loading {}", data_path.display()); + info!( + "Creating a fresh user dictionary at {}", + data_path.display() + ); let mut fresh_dict = loader.guess_format_and_load(&data_path)?; let user_dict_path = userdata_dir.join(UD_SQLITE_FILE_NAME); if cfg!(feature = "sqlite") && user_dict_path.exists() { #[cfg(feature = "sqlite")] { - let trie_dict = SqliteDictionary::open(user_dict_path) + info!( + "Importing existing sqlite dictionary at {}", + user_dict_path.display() + ); + let dict = SqliteDictionary::open(user_dict_path) .map_err(|e| io::Error::new(io::ErrorKind::Other, Box::new(e)))?; - for (syllables, phrase) in trie_dict.entries() { + for (syllables, phrase) in dict.entries() { let freq = phrase.freq(); let last_used = phrase.last_used().unwrap_or_default(); fresh_dict @@ -184,6 +220,10 @@ impl UserDictionaryLoader { } else { let uhash_path = userdata_dir.join(UD_UHASH_FILE_NAME); if uhash_path.exists() { + info!( + "Importing existing uhash dictionary at {}", + user_dict_path.display() + ); let mut input = File::open(uhash_path)?; if let Ok(phrases) = uhash::try_load_bin(&input).or_else(|_| { input.rewind()?; @@ -203,6 +243,7 @@ impl UserDictionaryLoader { } } + fresh_dict.set_usage(DictionaryUsage::User); Ok(fresh_dict) } /// Load a in-memory user dictionary. @@ -227,6 +268,7 @@ impl SingleDictionaryLoader { self.migrate_sqlite = migrate; } pub fn guess_format_and_load(&self, dict_path: &PathBuf) -> io::Result> { + info!("Loading dictionary {}", dict_path.display()); if self.migrate_sqlite && dict_path.is_file() { let metadata = dict_path.metadata()?; if metadata.permissions().readonly() { diff --git a/src/dictionary/mod.rs b/src/dictionary/mod.rs index c64869521..be919cfcc 100644 --- a/src/dictionary/mod.rs +++ b/src/dictionary/mod.rs @@ -12,13 +12,14 @@ use std::{ pub use self::layered::Layered; pub use self::loader::{ - DEFAULT_DICT_NAMES, LoadDictionaryError, SingleDictionaryLoader, SystemDictionaryLoader, - UserDictionaryLoader, + AssetLoader, DEFAULT_DICT_NAMES, LoadDictionaryError, SingleDictionaryLoader, + UserDictionaryManager, }; #[cfg(feature = "sqlite")] pub use self::sqlite::{SqliteDictionary, SqliteDictionaryBuilder, SqliteDictionaryError}; pub use self::trie::{Trie, TrieBuilder, TrieOpenOptions, TrieStatistics}; pub use self::trie_buf::TrieBuf; +pub use self::usage::DictionaryUsage; use crate::zhuyin::Syllable; mod layered; @@ -28,6 +29,7 @@ mod sqlite; mod trie; mod trie_buf; mod uhash; +mod usage; /// The error type which is returned from updating a dictionary. #[derive(Debug)] @@ -100,6 +102,8 @@ pub struct DictionaryInfo { /// /// It's recommended to include the name and the version number. pub software: String, + /// The intended usage of the dictionary. + pub usage: DictionaryUsage, } /// A type containing a phrase string and its frequency. @@ -347,6 +351,8 @@ pub trait Dictionary: Debug { fn about(&self) -> DictionaryInfo; /// Returns the dictionary file path if it's backed by a file. fn path(&self) -> Option<&Path>; + /// Set the runtime usage of the dictionary + fn set_usage(&mut self, usage: DictionaryUsage); /// Reopens the dictionary if it was changed by a different process /// /// It should not fail if the dictionary is read-only or able to sync across diff --git a/src/dictionary/sqlite.rs b/src/dictionary/sqlite.rs index 7f2726868..59beb9e26 100644 --- a/src/dictionary/sqlite.rs +++ b/src/dictionary/sqlite.rs @@ -12,7 +12,7 @@ use super::{ BuildDictionaryError, Dictionary, DictionaryBuilder, DictionaryInfo, Entries, LookupStrategy, Phrase, UpdateDictionaryError, }; -use crate::zhuyin::Syllable; +use crate::{dictionary::DictionaryUsage, zhuyin::Syllable}; const APPLICATION_ID: u32 = 0x43484557; // 'CHEW' in big-endian const USER_VERSION: u32 = 0; @@ -406,6 +406,8 @@ impl Dictionary for SqliteDictionary { self.path.as_ref().map(|p| p as &Path) } + fn set_usage(&mut self, _usage: DictionaryUsage) {} + fn reopen(&mut self) -> Result<(), UpdateDictionaryError> { Ok(()) } diff --git a/src/dictionary/trie.asn1 b/src/dictionary/trie.asn1 index 000ed0283..efb353cea 100644 --- a/src/dictionary/trie.asn1 +++ b/src/dictionary/trie.asn1 @@ -19,7 +19,8 @@ BEGIN license UTF8String, version UTF8String, software UTF8String, - ... + ..., + usage [0] Usage OPTIONAL } Index ::= OCTET STRING Phrase ::= SEQUENCE @@ -30,5 +31,6 @@ BEGIN ... } Version ::= INTEGER { v1(0) } + Usage ::= INTEGER { unknown(0), built-in(1), extension(2), custom(3), user(4), exclude-list(5) } Uint64 ::= INTEGER (0..18446744073709551615) END diff --git a/src/dictionary/trie.rs b/src/dictionary/trie.rs index 2e1e9c138..0dcbc6d1e 100644 --- a/src/dictionary/trie.rs +++ b/src/dictionary/trie.rs @@ -23,7 +23,7 @@ use super::{ BuildDictionaryError, Dictionary, DictionaryBuilder, DictionaryInfo, Entries, LookupStrategy, Phrase, }; -use crate::zhuyin::Syllable; +use crate::{dictionary::DictionaryUsage, zhuyin::Syllable}; const DICT_FORMAT_VERSION: u8 = 0; @@ -419,26 +419,32 @@ impl Dictionary for Trie { fn path(&self) -> Option<&Path> { self.path.as_ref().map(|p| p as &Path) } + + fn set_usage(&mut self, usage: DictionaryUsage) { + self.info.usage = usage; + } } fn context_specific( tag_number: u8, + tag_mode: TagMode, value: &T, ) -> ContextSpecificRef<'_, T> { ContextSpecificRef { tag_number: TagNumber::new(tag_number), - tag_mode: TagMode::Implicit, + tag_mode, value, } } fn context_specific_opt( tag_number: u8, + tag_mode: TagMode, value: &Option, ) -> Option> { value .as_ref() - .map(|value| context_specific(tag_number, value)) + .map(|value| context_specific(tag_number, tag_mode, value)) } struct DictionaryInfoRef<'a> { @@ -447,6 +453,7 @@ struct DictionaryInfoRef<'a> { license: Utf8StringRef<'a>, version: Utf8StringRef<'a>, software: Utf8StringRef<'a>, + usage: DictionaryUsage, } impl From> for DictionaryInfo { @@ -457,6 +464,7 @@ impl From> for DictionaryInfo { license: value.license.into(), version: value.version.into(), software: value.software.into(), + usage: value.usage.into(), } } } @@ -469,6 +477,7 @@ impl DictionaryInfoRef<'_> { license: Utf8StringRef::new(&info.license).unwrap(), version: Utf8StringRef::new(&info.version).unwrap(), software: Utf8StringRef::new(&info.software).unwrap(), + usage: info.usage, } } } @@ -485,12 +494,19 @@ impl<'a> DecodeValue<'a> for DictionaryInfoRef<'a> { let license = reader.decode()?; let version = reader.decode()?; let software = reader.decode()?; + let raw_usage = reader + .context_specific(TagNumber::N0, TagMode::Explicit)? + .unwrap_or(0); + let usage = DictionaryUsage::from(raw_usage); + // consume the remaining unknown data + let _ = reader.read_slice(reader.remaining_len()); Ok(DictionaryInfoRef { name, copyright, license, version, software, + usage, }) }) } @@ -503,6 +519,9 @@ impl EncodeValue for DictionaryInfoRef<'_> { + self.license.encoded_len()? + self.version.encoded_len()? + self.software.encoded_len()? + // TODO - enable this will break chewing <= 0.11.0 because old + // parser did not handle extension marker properly + // + context_specific(0, TagMode::Explicit, &(self.usage as u8)).encoded_len()? } fn encode_value(&self, encoder: &mut impl Writer) -> der::Result<()> { @@ -511,6 +530,9 @@ impl EncodeValue for DictionaryInfoRef<'_> { self.license.encode(encoder)?; self.version.encode(encoder)?; self.software.encode(encoder)?; + // TODO - enable this will break chewing <= 0.11.0 because old + // parser did not handle extension marker properly + // context_specific(0, TagMode::Explicit, &(self.usage as u8)).encode(encoder)?; Ok(()) } } @@ -538,6 +560,8 @@ impl<'a> DecodeValue<'a> for TrieFileRef<'a> { let info = reader.decode()?; let index = reader.decode()?; let phrase_seq = reader.decode()?; + // consume the remaining unknown data + let _ = reader.read_slice(reader.remaining_len()); Ok(Self { info, index, @@ -576,6 +600,8 @@ impl<'a> DecodeValue<'a> for Phrase { let phrase: Utf8StringRef<'_> = reader.decode()?; let freq = reader.decode()?; let last_used = reader.context_specific(TagNumber::N0, TagMode::Implicit)?; + // consume the remaining unknown data + let _ = reader.read_slice(reader.remaining_len()); Ok(Phrase { text: String::from(phrase).into_boxed_str(), freq, @@ -589,13 +615,13 @@ impl EncodeValue for Phrase { fn value_len(&self) -> der::Result { Utf8StringRef::new(self.as_str())?.encoded_len()? + self.freq.encoded_len()? - + context_specific_opt(0, &self.last_used).encoded_len()? + + context_specific_opt(0, TagMode::Implicit, &self.last_used).encoded_len()? } fn encode_value(&self, encoder: &mut impl Writer) -> der::Result<()> { Utf8StringRef::new(self.as_ref())?.encode(encoder)?; self.freq.encode(encoder)?; - context_specific_opt(0, &self.last_used).encode(encoder)?; + context_specific_opt(0, TagMode::Implicit, &self.last_used).encode(encoder)?; Ok(()) } } @@ -890,6 +916,11 @@ impl TrieBuilder { node_id } + /// Set the intended usage of this trie dictionary. + pub fn set_usage(&mut self, usage: DictionaryUsage) { + self.info.usage = usage; + } + /// Writes the dictionary to an output stream and returns the number of /// bytes written. /// @@ -986,8 +1017,9 @@ impl TrieBuilder { } } + let info = DictionaryInfoRef::new(&self.info); let trie_dict_ref = TrieFileRef { - info: DictionaryInfoRef::new(&self.info), + info, index: OctetStringRef::new(&dict_buf).map_err(io_error)?, phrase_seq: PhraseSeqRef { der_bytes: &data_buf.buf, @@ -1214,8 +1246,8 @@ mod tests { use super::{Trie, TrieBuilder}; use crate::{ dictionary::{ - Dictionary, DictionaryBuilder, DictionaryInfo, LookupStrategy, Phrase, TrieOpenOptions, - trie::TrieBuilderNode, + Dictionary, DictionaryBuilder, DictionaryInfo, DictionaryUsage, LookupStrategy, Phrase, + TrieOpenOptions, trie::TrieBuilderNode, }, syl, zhuyin::Bopomofo, @@ -1615,6 +1647,7 @@ mod tests { license: "license".into(), version: "version".into(), software: "software".into(), + usage: DictionaryUsage::BuiltIn, }; builder.set_info(info)?; diff --git a/src/dictionary/trie_buf.rs b/src/dictionary/trie_buf.rs index 7d004a9fe..2a9c4f0e7 100644 --- a/src/dictionary/trie_buf.rs +++ b/src/dictionary/trie_buf.rs @@ -7,11 +7,11 @@ use std::{ thread::{self, JoinHandle}, }; -use log::{error, info}; +use log::{debug, error, info}; use super::{ - BuildDictionaryError, Dictionary, DictionaryBuilder, DictionaryInfo, Entries, LookupStrategy, - Phrase, Trie, TrieBuilder, UpdateDictionaryError, + BuildDictionaryError, Dictionary, DictionaryBuilder, DictionaryInfo, DictionaryUsage, Entries, + LookupStrategy, Phrase, Trie, TrieBuilder, UpdateDictionaryError, }; use crate::zhuyin::Syllable; @@ -45,6 +45,7 @@ impl TrieBuf { license: "Unknown".to_string(), version: "0.0.0".to_string(), software: software_version(), + usage: DictionaryUsage::Unknown, }; let mut builder = TrieBuilder::new(); builder @@ -130,6 +131,11 @@ impl TrieBuf { let mut sort_map = BTreeMap::new(); let mut phrases: Vec = Vec::new(); + debug!( + "lookup {syllables:?} result: {:?}", + self.entries_iter_for(syllables, strategy) + .collect::>() + ); for phrase in self.entries_iter_for(syllables, strategy) { match sort_map.entry(phrase.to_string()) { Entry::Occupied(entry) => { @@ -161,6 +167,7 @@ impl TrieBuf { return Err(UpdateDictionaryError { source: None }); } + debug!("added phrase {} {syllables:?}", phrase.text); self.btree.insert( ( Cow::from(syllables.to_vec()), @@ -180,6 +187,7 @@ impl TrieBuf { user_freq: u32, time: u64, ) -> Result<(), UpdateDictionaryError> { + debug!("updated phrase {} {syllables:?}", phrase.text); self.btree.insert( ( Cow::from(syllables.to_vec()), @@ -188,6 +196,7 @@ impl TrieBuf { (user_freq, time), ); self.dirty = true; + debug!("{:?}", self.btree); Ok(()) } @@ -204,6 +213,7 @@ impl TrieBuf { .insert((syllables_key, phrase_str.to_owned().into())); self.dirty = true; + debug!("removed phrase {phrase_str} {syllables:?}"); Ok(()) } @@ -304,6 +314,12 @@ impl Dictionary for TrieBuf { self.trie.as_ref()?.path() } + fn set_usage(&mut self, usage: DictionaryUsage) { + if let Some(trie) = self.trie.as_mut() { + trie.set_usage(usage); + } + } + fn reopen(&mut self) -> Result<(), UpdateDictionaryError> { self.sync()?; Ok(()) diff --git a/src/dictionary/usage.rs b/src/dictionary/usage.rs new file mode 100644 index 000000000..092c53ef4 --- /dev/null +++ b/src/dictionary/usage.rs @@ -0,0 +1,69 @@ +use std::{convert::Infallible, fmt::Display, str::FromStr}; + +/// The intended usage of the dictionary. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +#[non_exhaustive] +pub enum DictionaryUsage { + /// Default value + #[default] + Unknown = 0, + /// A built-in dictionary + BuiltIn = 1, + /// A contributed extension + Extension = 2, + /// A local custom dictionary + Custom = 3, + /// A user dictionary + /// + /// This usage type may be used to identify whether a dictionary + /// should be opened as read/write and as user dictionary by the + /// Layered virtual dictionary. + User = 4, + /// A exclusion user dictionary + /// + /// This usage type may be used to identify whether a dictionary + /// should be opened as read/write and as the exclusion dictionary + /// by the Layered virtual dictionary. + ExcludeList = 5, +} + +impl From for DictionaryUsage { + fn from(value: u8) -> Self { + match value { + 1 => DictionaryUsage::BuiltIn, + 2 => DictionaryUsage::Extension, + 3 => DictionaryUsage::Custom, + 4 => DictionaryUsage::User, + 5 => DictionaryUsage::ExcludeList, + _ => DictionaryUsage::Unknown, + } + } +} + +impl Display for DictionaryUsage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DictionaryUsage::Unknown => f.write_str("unknown"), + DictionaryUsage::BuiltIn => f.write_str("built-in"), + DictionaryUsage::Extension => f.write_str("extension"), + DictionaryUsage::Custom => f.write_str("custom"), + DictionaryUsage::User => f.write_str("user"), + DictionaryUsage::ExcludeList => f.write_str("exclude-list"), + } + } +} + +impl FromStr for DictionaryUsage { + type Err = Infallible; + fn from_str(s: &str) -> Result { + Ok(match s { + "unknown" => DictionaryUsage::Unknown, + "built-in" => DictionaryUsage::BuiltIn, + "extension" => DictionaryUsage::Extension, + "custom" => DictionaryUsage::Custom, + "user" => DictionaryUsage::User, + "exclude-list" => DictionaryUsage::ExcludeList, + _ => DictionaryUsage::Unknown, + }) + } +} diff --git a/src/editor/mod.rs b/src/editor/mod.rs index d5429e795..67cdb54fc 100644 --- a/src/editor/mod.rs +++ b/src/editor/mod.rs @@ -23,8 +23,8 @@ use crate::{ special_symbol_input, }, dictionary::{ - Dictionary, Layered, LookupStrategy, SystemDictionaryLoader, Trie, UpdateDictionaryError, - UserDictionaryLoader, + AssetLoader, Dictionary, DictionaryUsage, Layered, LookupStrategy, Trie, + UpdateDictionaryError, UserDictionaryManager, }, input::{KeyState, KeyboardEvent, keysym::*}, zhuyin::Syllable, @@ -189,29 +189,60 @@ pub(crate) struct SharedState { impl Editor { pub fn chewing( - syspath: Option, + search_path: Option, userpath: Option, enabled_dicts: &[T], ) -> Editor where T: AsRef, { - let mut sys_loader = SystemDictionaryLoader::new(); - if let Some(syspath) = syspath { - sys_loader = sys_loader.sys_path(syspath); - } - let system_dicts = match sys_loader.load(enabled_dicts) { - Ok(d) => d, - Err(e) => { - let builtin = Trie::new(&include_bytes!("data/mini.dat")[..]); - error!("Failed to load system dict: {e}"); - error!("Loading builtin minimum dictionary..."); - // NB: we can unwrap because the built-in dictionary should always - // be valid. - vec![Box::new(builtin.unwrap()) as Box] + let mut enabled_dicts: Vec = enabled_dicts + .iter() + .map(|it| it.as_ref().to_owned()) + .collect(); + let mut user_dict_mgr = UserDictionaryManager::new(); + let user_dict = { + let mut custom_userpath = false; + if let Some(userpath) = userpath { + custom_userpath = true; + user_dict_mgr = user_dict_mgr.userphrase_path(userpath); } + if custom_userpath && let Some(file_name) = user_dict_mgr.file_name() { + // If we load user dictionary from passed in path then we should not load it again. + if let Some(index) = enabled_dicts.iter().position(|d| d == &file_name) { + enabled_dicts.remove(index); + } + } + let user_dict = user_dict_mgr + .init() + .inspect_err(|error| { + error!("Failed to load user dict: {error}"); + }) + .ok(); + if custom_userpath { user_dict } else { None } }; - let abbrev = sys_loader.load_abbrev(); + let mut loader = AssetLoader::new(); + if let Some(syspath) = search_path { + loader = loader.search_path(syspath); + } + let mut dicts = loader.load(&enabled_dicts); + if let Some(user_dict) = user_dict { + dicts.push(user_dict); + } + if !dicts.iter().any(|dict| { + matches!( + dict.about().usage, + DictionaryUsage::BuiltIn | DictionaryUsage::Extension | DictionaryUsage::Custom + ) + }) { + let builtin = Trie::new(&include_bytes!("data/mini.dat")[..]); + error!("Failed to load any system dictionaries"); + error!("Loading builtin mini dictionary..."); + // SAFETY: we can unwrap because the built-in dictionary should always be valid. + dicts.insert(0, Box::new(builtin.unwrap())); + } + + let abbrev = loader.load_abbrev(); let abbrev = match abbrev { Ok(abbr) => abbr, Err(e) => { @@ -220,7 +251,7 @@ impl Editor { AbbrevTable::new() } }; - let sym_sel = sys_loader.load_symbol_selector(); + let sym_sel = loader.load_symbol_selector(); let sym_sel = match sym_sel { Ok(sym_sel) => sym_sel, Err(e) => { @@ -230,19 +261,8 @@ impl Editor { SymbolSelector::new(b"".as_slice()).unwrap() } }; - let mut user_dict_loader = UserDictionaryLoader::new(); - if let Some(userpath) = userpath { - user_dict_loader = user_dict_loader.userphrase_path(userpath); - } - let user_dictionary = match user_dict_loader.load() { - Ok(d) => d, - Err(e) => { - error!("Failed to load user dict: {e}"); - UserDictionaryLoader::in_memory() - } - }; - let estimate = LaxUserFreqEstimate::max_from(user_dictionary.as_ref()); - let dict = Layered::new(system_dicts, user_dictionary); + let mut dict = Layered::new(dicts); + let estimate = LaxUserFreqEstimate::max_from(dict.user_dict()); let conversion_engine = Box::new(ChewingEngine::new()); let editor = Editor::new(conversion_engine, dict, estimate, abbrev, sym_sel); editor @@ -1619,10 +1639,7 @@ mod tests { #[test] fn editing_mode_input_bopomofo() { - let dict = Layered::new( - vec![Box::new(TrieBuf::new_in_memory())], - Box::new(TrieBuf::new_in_memory()), - ); + let dict = Layered::new(vec![Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1656,7 +1673,7 @@ mod tests { vec![crate::syl![bpmf::C, bpmf::E, bpmf::TONE4]], vec![("冊", 100)], )]); - let dict = Layered::new(vec![Box::new(dict)], Box::new(TrieBuf::new_in_memory())); + let dict = Layered::new(vec![Box::new(dict), Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1688,7 +1705,7 @@ mod tests { vec![crate::syl![bpmf::C, bpmf::E, bpmf::TONE4]], vec![("冊", 100), ("測", 200)], )]); - let dict = Layered::new(vec![Box::new(dict)], Box::new(TrieBuf::new_in_memory())); + let dict = Layered::new(vec![Box::new(dict), Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1733,7 +1750,7 @@ mod tests { vec![crate::syl![bpmf::C, bpmf::E, bpmf::TONE4]], vec![("冊", 100), ("測", 200)], )]); - let dict = Layered::new(vec![Box::new(dict)], Box::new(TrieBuf::new_in_memory())); + let dict = Layered::new(vec![Box::new(dict), Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1778,7 +1795,7 @@ mod tests { vec![crate::syl![bpmf::C, bpmf::E, bpmf::TONE4]], vec![("冊", 100)], )]); - let dict = Layered::new(vec![Box::new(dict)], Box::new(TrieBuf::new_in_memory())); + let dict = Layered::new(vec![Box::new(dict), Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1819,7 +1836,7 @@ mod tests { vec![crate::syl![bpmf::C, bpmf::E, bpmf::TONE4]], vec![("冊", 100)], )]); - let dict = Layered::new(vec![Box::new(dict)], Box::new(TrieBuf::new_in_memory())); + let dict = Layered::new(vec![Box::new(dict), Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1873,7 +1890,7 @@ mod tests { #[test] fn editing_mode_input_switch_mode_behavior() { let dict = TrieBuf::new_in_memory(); - let dict = Layered::new(vec![Box::new(dict)], Box::new(TrieBuf::new_in_memory())); + let dict = Layered::new(vec![Box::new(dict), Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1896,7 +1913,7 @@ mod tests { vec![crate::syl![bpmf::C, bpmf::E, bpmf::TONE4]], vec![("冊", 100)], )]); - let dict = Layered::new(vec![Box::new(dict)], Box::new(TrieBuf::new_in_memory())); + let dict = Layered::new(vec![Box::new(dict), Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1933,7 +1950,7 @@ mod tests { #[test] fn editing_mode_input_full_shape_symbol() { let dict = TrieBuf::new_in_memory(); - let dict = Layered::new(vec![Box::new(dict)], Box::new(TrieBuf::new_in_memory())); + let dict = Layered::new(vec![Box::new(dict), Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); @@ -1972,10 +1989,7 @@ mod tests { #[test] fn editing_mode_open_empty_symbol_table_then_bell() { - let dict = Layered::new( - vec![Box::new(TrieBuf::new_in_memory())], - Box::new(TrieBuf::new_in_memory()), - ); + let dict = Layered::new(vec![Box::new(TrieBuf::new_in_memory())]); let conversion_engine = Box::new(ChewingEngine::new()); let estimate = LaxUserFreqEstimate::new(0); let abbrev = AbbrevTable::new(); diff --git a/src/path.rs b/src/path.rs index 9932460de..2099883a8 100644 --- a/src/path.rs +++ b/src/path.rs @@ -34,29 +34,29 @@ fn file_exists(path: &Path) -> bool { } } -pub fn sys_path_from_env_var() -> String { +pub fn search_path_from_env_var() -> String { + let mut paths = vec![]; + if let Some(user_datadir) = data_dir() { + paths.push( + user_datadir + .join(DICT_FOLDER) + .to_string_lossy() + .into_owned(), + ); + paths.push(user_datadir.to_string_lossy().into_owned()); + } let chewing_path = env::var("CHEWING_PATH"); if let Ok(chewing_path) = chewing_path { - info!("Using syspath from env CHEWING_PATH: {}", chewing_path); - chewing_path + info!("Add path from CHEWING_PATH: {}", chewing_path); + paths.push(chewing_path); } else { - let mut paths = vec![]; - if let Some(user_datadir) = data_dir() { - paths.push( - user_datadir - .join(DICT_FOLDER) - .to_string_lossy() - .into_owned(), - ); - paths.push(user_datadir.to_string_lossy().into_owned()); - } let sys_datadir = PathBuf::from(SYS_PATH.unwrap_or(DEFAULT_SYS_PATH)); paths.push(sys_datadir.join(DICT_FOLDER).to_string_lossy().into_owned()); paths.push(sys_datadir.to_string_lossy().into_owned()); - let chewing_path = paths.join(&SEARCH_PATH_SEP.to_string()); - info!("Using default syspath: {}", chewing_path); - chewing_path } + let chewing_path = paths.join(&SEARCH_PATH_SEP.to_string()); + info!("Using search path: {}", chewing_path); + chewing_path } pub(crate) fn find_path_by_files(search_path: &str, files: &[&str]) -> Option { diff --git a/src/zhuyin/syllable.rs b/src/zhuyin/syllable.rs index 7077bd189..1e08ecd9c 100644 --- a/src/zhuyin/syllable.rs +++ b/src/zhuyin/syllable.rs @@ -19,10 +19,14 @@ pub struct Syllable { impl Debug for Syllable { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Syllable") - .field("value", &self.value) - .field("to_string()", &self.to_string()) - .finish() + if f.alternate() { + f.debug_struct("Syllable") + .field("value", &self.value) + .field("to_string()", &self.to_string()) + .finish() + } else { + f.write_str(&self.to_string()) + } } } diff --git a/tests/data/golden-chewing.sqlite3 b/tests/data/golden-chewing.sqlite3 index f4a866265ad27f09f5700d71623d368228c7601c..f717a80695dd296b9ecba23a7e082409ca27cdc1 100644 GIT binary patch delta 31 ncmZozz}T>Wae_1>^F$d97H1FF@QIG%tjr90T|ygE=J*2ui+%{L delta 26 icmZozz}T>Wae_1>(?l7KiJj7{ObmKuof}i;_yYiI;0P}O diff --git a/tests/test-userphrase.c b/tests/test-userphrase.c index c35f57602..aa0a2c57f 100644 --- a/tests/test-userphrase.c +++ b/tests/test-userphrase.c @@ -894,10 +894,14 @@ void test_userphrase_remove() ok(ret == 1, "chewing_userphrase_add() return value `%d' shall be `%d'", ret, 1); ret = chewing_userphrase_remove(ctx, p1, b1); ok(ret == 1, "chewing_userphrase_remove() return value `%d' shall be `%d'", ret, 1); + ret = chewing_userphrase_lookup(ctx, p2, b1); + ok(ret == 1, "chewing_userphrase_lookup() return value `%d' shall be `%d'", ret, 1); chewing_delete(ctx); ctx = NULL; ctx = chewing_new(); + ret = chewing_userphrase_lookup(ctx, p2, b1); + ok(ret == 1, "chewing_userphrase_lookup() return value `%d' shall be `%d'", ret, 1); ret = chewing_userphrase_remove(ctx, p2, b1); ok(ret == 1, "chewing_userphrase_remove() return value `%d' shall be `%d'", ret, 1); chewing_delete(ctx); diff --git a/tools/src/dump.rs b/tools/src/dump.rs index b643b68da..1a6e06e8f 100644 --- a/tools/src/dump.rs +++ b/tools/src/dump.rs @@ -54,6 +54,7 @@ fn dump_dict_tsi_src(mut sink: BufWriter>, dict: &dyn Dictionary) writeln!(sink, "# dc:rights {}", info.copyright)?; writeln!(sink, "# dc:license {}", info.license)?; writeln!(sink, "# dc:identifier {}", info.version)?; + writeln!(sink, "# dc:type {}", info.usage)?; writeln!(sink, "# 詞(phrase) 詞頻(freq) 注音(bopomofo)")?; for (syllables, phrase) in dict.entries() { writeln!( @@ -77,6 +78,7 @@ fn dump_dict_csv(mut sink: BufWriter>, dict: &dyn Dictionary) -> writeln!(sink, "# dc:rights,{},", info.copyright)?; writeln!(sink, "# dc:license,{},", info.license)?; writeln!(sink, "# dc:identifier,{},", info.version)?; + writeln!(sink, "# dc:type,{},", info.usage)?; writeln!(sink, "# 詞(phrase),詞頻(freq),注音(bopomofo)")?; for (syllables, phrase) in dict.entries() { writeln!( diff --git a/tools/src/flags.rs b/tools/src/flags.rs index 00c43642b..36ea82e54 100644 --- a/tools/src/flags.rs +++ b/tools/src/flags.rs @@ -1,5 +1,6 @@ use std::path::PathBuf; +use chewing::dictionary::DictionaryUsage; use clap::{Args, Parser, Subcommand, ValueEnum}; #[derive(Parser)] @@ -37,6 +38,9 @@ pub(crate) struct InitDatabase { /// Version of the dictionary #[arg(short('r'), long, default_value = "1.0.0")] pub(crate) version: String, + /// The usage type of the dictionary + #[arg(short, long, default_value = "unknown")] + pub(crate) usage: DictionaryUsage, /// Skip invalid lines #[arg(short, long)] pub(crate) skip_invalid: bool, diff --git a/tools/src/info.rs b/tools/src/info.rs index 53bd96863..9d8a0525b 100644 --- a/tools/src/info.rs +++ b/tools/src/info.rs @@ -1,7 +1,7 @@ use anyhow::Result; use chewing::{ - dictionary::{Dictionary, SingleDictionaryLoader, UserDictionaryLoader}, - path::{find_files_by_ext, sys_path_from_env_var}, + dictionary::{Dictionary, SingleDictionaryLoader, UserDictionaryManager}, + path::{find_files_by_ext, search_path_from_env_var}, }; use crate::flags; @@ -10,7 +10,7 @@ pub(crate) fn run(args: flags::Info) -> Result<()> { if args.system { // FIXME: use find_files_by_ext and generic loader let loader = SingleDictionaryLoader::new(); - let search_path = sys_path_from_env_var(); + let search_path = search_path_from_env_var(); let files = find_files_by_ext(&search_path, &["dat", "sqlite3"]); let dictionaries: Vec<_> = files .iter() @@ -24,7 +24,7 @@ pub(crate) fn run(args: flags::Info) -> Result<()> { } } if args.user { - let dict = UserDictionaryLoader::new().load()?; + let dict = UserDictionaryManager::new().init()?; if args.json { print_json_info(&[dict], "user"); } else { @@ -32,7 +32,7 @@ pub(crate) fn run(args: flags::Info) -> Result<()> { } } if let Some(path) = args.path { - let dict = UserDictionaryLoader::new().userphrase_path(path).load()?; + let dict = SingleDictionaryLoader::new().guess_format_and_load(&path)?; if args.json { print_json_info(&[dict], "input"); } else { @@ -76,6 +76,7 @@ fn print_json_info(dictionaries: &[Box], from: &str) { println!(r#" "copyright": "{}","#, escape_json(info.copyright)); println!(r#" "license": "{}","#, escape_json(info.license)); println!(r#" "software": "{}""#, escape_json(info.software)); + println!(r#" "usage": "{}""#, escape_json(info.usage.to_string())); println!(" }}{}", if iter.peek().is_some() { "," } else { "" }); } println!("]"); @@ -96,5 +97,6 @@ fn print_info(dictionaries: &[Box], from: &str) { println!("Copyright : {}", info.copyright); println!("License : {}", info.license); println!("Software : {}", info.software); + println!("Usage : {}", info.usage); } } diff --git a/tools/src/init_database.rs b/tools/src/init_database.rs index 45fc71a28..92b758f10 100644 --- a/tools/src/init_database.rs +++ b/tools/src/init_database.rs @@ -82,6 +82,7 @@ pub(crate) fn run(args: flags::InitDatabase) -> Result<()> { let mut copyright = args.copyright; let mut license = args.license; let mut version = args.version; + let mut usage = args.usage; let software = format!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); let tsi = File::open(args.tsi_src)?; @@ -111,6 +112,7 @@ pub(crate) fn run(args: flags::InitDatabase) -> Result<()> { "dc:rights" => copyright = value, "dc:license" => license = value, "dc:identifier" => version = value, + "dc:type" => usage = value.parse().unwrap(), _ => (), } continue; @@ -152,6 +154,7 @@ pub(crate) fn run(args: flags::InitDatabase) -> Result<()> { license, version, software, + usage, }; builder.set_info(info.clone())?; @@ -164,6 +167,7 @@ pub(crate) fn run(args: flags::InitDatabase) -> Result<()> { eprintln!("Copyright : {}", info.copyright); eprintln!("License : {}", info.license); eprintln!("Version : {}", info.version); + eprintln!("Usage : {}", info.usage); eprintln!("Node count : {}", stats.node_count); eprintln!("Leaf count : {}", stats.leaf_count); eprintln!("Phrase count : {}", stats.phrase_count);