From d83abfb0e101939d1a10517a0a07aee4c3d212b7 Mon Sep 17 00:00:00 2001 From: James Haver Date: Thu, 8 Mar 2018 14:55:36 +0800 Subject: [PATCH 01/16] More tables for Okinawan --- src/okinawa.rs | 24 ++++++ src/util.rs | 208 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 232 insertions(+) diff --git a/src/okinawa.rs b/src/okinawa.rs index a972e59..f3d6c82 100644 --- a/src/okinawa.rs +++ b/src/okinawa.rs @@ -1502,4 +1502,28 @@ ki chi Okinawa->Uchinaa tsu chi tsume->chimi, tsukuru->chikuin ari, ori, uri ai, ui, ui hari->haai, tori->tui, Shuri->Sui awa aa Okinawa->Uchinaa + + +サフー +さふー 作法 さほう + +さらゆん さらいん 習う。学ぶ。教えを受ける。 + +しーじゃ 年上(の者)。年長(者) + +うっとぅ +ウットゥ uQtu(年下、弟妹)の対 + +ウヤマユン +うやむゆん +うやむいん 敬う。あがめる。 + +ちち (天体の) 月 + +わ行 ら行 や行 ま行 は行 な行 た行 さ行 か行 あ行 +あ段 +い段 +う段 +え段 +お段 */ diff --git a/src/util.rs b/src/util.rs index fdf2cac..5ea4a86 100644 --- a/src/util.rs +++ b/src/util.rs @@ -12,6 +12,17 @@ pub fn lookup(s: &str, table: &'static [(&str,&str)]) -> Option { } } +pub fn lookups(s: &str,x: &str, table: &'static [(&str, &'static [(&str,&str)])]) -> Option { + match table.binary_search_by(|&(key, _)| { + if s == key { Equal } + else if key < s { Less } + else { Greater } + }) { + Ok(i) => lookup(x, table[i].1), + Err(_) => None, + } +} + pub fn is_consonant(s: &str) -> bool { s.len() == 1 && !["a","e","i","n","o","u","y"].contains(&s) } @@ -36,4 +47,201 @@ pub const HIRAGANA_TO_GEMINATE_TABLE: &'static [(&str, &str)] = &[ ("ぁ",""),("あ",""),("ぃ",""),("い",""),("ぅ",""),("う",""),("ぇ",""),("え",""),("ぉ",""),("お",""),("か","k"),("が","g"),("き","k"),("ぎ","g"),("く","k"),("ぐ","g"),("け","k"),("げ","g"),("こ","k"),("ご","g"),("さ","s"),("ざ","z"),("し","s"),("じ","j"),("す","s"),("ず","z"),("せ","s"),("ぜ","z"),("そ","s"),("ぞ","z"),("た","t"),("だ","d"),("ち","t"),("ぢ","d"),("つ","t"),("づ","d"),("て","t"),("で","d"),("と","t"),("ど","d"),("な","n"),("に","n"),("ぬ","n"),("ね","n"),("の","n"),("は","h"),("ば","b"),("ぱ","p"),("ひ","h"),("び","b"),("ぴ","p"),("ふ","f"),("ぶ","b"),("ぷ","p"),("へ","h"),("べ","b"),("ぺ","b"),("ほ","h"),("ぼ","b"),("ぽ","p"),("ま","m"),("み","m"),("む","m"),("め","m"),("も","m"),("ゃ","y"),("や","y"),("ゅ","y"),("ゆ","y"),("ょ","y"),("よ","y"),("ら","r"),("り","r"),("る","r"),("れ","r"),("ろ","r"),("わ","w"),("ゐ","w"),("ゑ","w"),("を","w"),("ゔ","v"),("ゕ","k"),("ゖ","k") ]; + +pub const HIRAGANA_TABLE: &'static [(&str, &'static [(&str, &str)])] = &[ + ("あ",&[("あ","あ"),("い","い"),("う","う"),("え","え"),("お","お")]), + ("か",&[("あ","か"),("い","き"),("う","く"),("え","け"),("お","こ")]), + ("さ",&[("あ","さ"),("い","し"),("う","す"),("え","せ"),("お","そ")]), + ("た",&[("あ","た"),("い","ち"),("う","つ"),("え","て"),("お","と")]), + ("な",&[("あ","な"),("い","に"),("う","ぬ"),("え","ね"),("お","の")]), + ("は",&[("あ","は"),("い","ひ"),("う","ふ"),("え","へ"),("お","ほ")]), + ("ま",&[("あ","ま"),("い","み"),("う","む"),("え","め"),("お","も")]), + ("や",&[("あ","や"),("い","いぃ"),("う","ゆ"),("え","いぇ"),("お","よ")]), + ("ら",&[("あ","ら"),("い","り"),("う","る"),("え","れ"),("お","ろ")]), + ("わ",&[("あ","わ"),("い","ゐ"),("う","う"),("え","ゑ"),("お","を")]) +]; + +pub const A_ROW: &'static [(&str, &str)] = &[ + ("あ","あ"),("い","い"),("う","う"),("え","え"),("お","お") +]; + +pub const YA_ROW: &'static [(&str, &str)] = &[ + ("あ","や"),("い","いぃ"),("う","ゆ"),("え","いぇ"),("お","よ") +]; + +pub const WA_ROW: &'static [(&str, &str)] = &[ + ("あ","わ"),("い","ゐ"),("う","をぅ"),("え","ゑ"),("お","を") +]; + +// qyi and qye do not exist +pub const QYA_ROW: &'static [(&str, &str)] = &[ + ("あ","っや"),("う","っゆ"),("お","っよ") +]; + +// qwu does not exist +pub const QWA_ROW: &'static [(&str, &str)] = &[ + ("あ","っわ"),("い","っゐ"),("え","っゑ"),("お","っを") +]; + +pub const QA_ROW: &'static [(&str, &str)] = &[ + ("あ","っあ"),("い","っい"),("う","っう"),("え","っえ"),("お","っお") +]; + +pub const KA_ROW: &'static [(&str, &str)] = &[ + ("あ","か"),("い","き"),("う","く"),("え","け"),("お","こ") +]; + +// kye and kyi do not exits +pub const KYA_ROW: &'static [(&str, &str)] = &[ + ("あ","きゃ"),("う","きゅ"),("お","きょ") +]; + +// kwu does not exist +pub const KWA_ROW: &'static [(&str, &str)] = &[ + ("あ","くぁ"),("い","くぃ"),("え","くぇ"),("お","くぉ") +]; + +pub const GA_ROW: &'static [(&str, &str)] = &[ + ("あ","が"),("い","ぎ"),("う","ぐ"),("え","げ"),("お","ご") +]; + +// gyi and gye do not exist +pub const GYA_ROW: &'static [(&str, &str)] = &[ + ("あ","ぎゃ"),("う","ぎゅ"),("お","ぎょ") +]; + +// gwu does not exist +pub const GWA_ROW: &'static [(&str, &str)] = &[ + ("あ","ぐぁ"),("い","ぐぃ"),("え","ぐぇ"),("お","ぐぉ") +]; + +pub const SA_ROW: &'static [(&str, &str)] = &[ + ("あ","さ"),("い","すぃ"),("う","す"),("え","せ"),("お","そ") +]; + +pub const SHA_ROW: &'static [(&str, &str)] = &[ + ("あ","しゃ"),("い","し"),("う","しゅ"),("え","しぇ"),("お","しょ") +]; + +pub const ZA_ROW: &'static [(&str, &str)] = &[ + ("あ","ざ"),("い","ずぃ"),("う","ず"),("え","ぜ"),("お","ぞ") +]; + +pub const JA_ROW: &'static [(&str, &str)] = &[ + ("あ","じゃ"),("い","じ"),("う","じゅ"),("え","じぇ"),("お","じょ") +]; + +pub const TA_ROW: &'static [(&str, &str)] = &[ + ("あ","た"),("い","てぃ"),("う","とぅ"),("え","て"),("お","と") +]; + +pub const DA_ROW: &'static [(&str, &str)] = &[ + ("あ","だ"),("い","でぃ"),("う","どぅ"),("え","で"),("お","ど") +]; + +pub const TSA_ROW: &'static [(&str, &str)] = &[ + ("あ","つぁ"),("い","つぃ"),("う","つ"),("え","つぇ"),("お","つぉ") +]; + +pub const CHA_ROW: &'static [(&str, &str)] = &[ + ("あ","ちゃ"),("い","ち"),("う","ちゅ"),("え","ちぇ"),("お","ちょ") +]; + +pub const NA_ROW: &'static [(&str, &str)] = &[ + ("あ","な"),("い","に"),("う","ぬ"),("え","ね"),("お","の") +]; + +// incomplete + +pub const HA_ROW: &'static [(&str, &str)] = &[ + ("あ","は"),("い","ひ"),("う","ふ"),("え","へ"),("お","ほ") +]; + +pub const FA_ROW: &'static [(&str, &str)] = &[ + ("あ","ふぁ"),("い","ふぃ"),("う","ふ"),("え","ふぇ"),("お","ふぉ") +]; + +pub const BA_ROW: &'static [(&str, &str)] = &[ + ("あ","ば"),("い","び"),("う","ぶ"),("え","べ"),("お","ぼ") +]; + +pub const PA_ROW: &'static [(&str, &str)] = &[ + ("あ","ぱ"),("い","ぴ"),("う","ぷ"),("え","ぺ"),("お","ぽ") +]; + +pub const MA_ROW: &'static [(&str, &str)] = &[ + ("あ","ま"),("い","み"),("う","む"),("え","め"),("お","も") +]; + +pub const RA_ROW: &'static [(&str, &str)] = &[ + ("あ","ら"),("い","り"),("う","る"),("え","れ"),("お","ろ") +]; + +// kye and kyi do not exits +pub const NYA_ROW: &'static [(&str, &str)] = &[ + ("あ","にゃ"),("う","にゅ"),("お","にょ") +]; + +// kye and kyi do not exits +pub const HYA_ROW: &'static [(&str, &str)] = &[ + ("あ","ひゃ"),("う","ひゅ"),("お","ひょ") +]; + +// mye and myi do not exits +pub const MYA_ROW: &'static [(&str, &str)] = &[ + ("あ","きゃ"),("う","きゅ"),("お","きょ") +]; + +// kye and kyi do not exits +pub const RYA_ROW: &'static [(&str, &str)] = &[ + ("あ","りゃ"),("う","りゅ"),("お","りょ") +]; + + +pub const HIRAGANA_TABLE2: &'static [(&str, &'static [(&str, &str)])] = &[ + ("い",A_ROW),("いぃ",YA_ROW),("う",A_ROW),("え",A_ROW),("えぇ",YA_ROW),("お",A_ROW),("か",KA_ROW),("が",GA_ROW),("き",KA_ROW),("きゃ",KYA_ROW),("きゅ",KYA_ROW),("きょ",KYA_ROW),("ぎ",GA_ROW),("ぎゃ",GYA_ROW),("ぎゅ",GYA_ROW),("ぎょ",GYA_ROW),("く",KA_ROW),("くぁ",KWA_ROW),("くぃ",KWA_ROW),("くぇ",KWA_ROW),("くぉ",KWA_ROW),("ぐ",GA_ROW),("ぐぁ",GWA_ROW),("ぐぃ",GWA_ROW),("ぐぇ",GWA_ROW),("ぐぉ",GWA_ROW),("け",KA_ROW),("げ",GA_ROW),("こ",KA_ROW),("ご",KA_ROW),("さ",SA_ROW),("ざ",ZA_ROW),("し",SHA_ROW),("しぇ",SHA_ROW),("しゃ",SHA_ROW),("しゅ",SHA_ROW),("しょ",SHA_ROW),("じ",JA_ROW),("じぇ",JA_ROW),("じゃ",JA_ROW),("じゅ",JA_ROW),("じょ",JA_ROW),("す",SA_ROW),("すぃ",SA_ROW),("ず",ZA_ROW),("ずぃ",ZA_ROW),("せ",SA_ROW),("ぜ",ZA_ROW),("そ",SA_ROW),("ぞ",ZA_ROW),("た",TA_ROW),("だ",DA_ROW),("ち",CHA_ROW),("ちぇ",CHA_ROW),("ちゃ",CHA_ROW),("ちゅ",CHA_ROW),("ちょ",CHA_ROW),("っあ",QA_ROW),("っい",QA_ROW),("っう",QA_ROW),("っえ",QA_ROW),("っお",QA_ROW),("っや",QYA_ROW),("っゆ",QYA_ROW),("っよ",QYA_ROW),("っわ",QWA_ROW),("っゐ",QWA_ROW),("っゑ",QWA_ROW),("っを",QWA_ROW),("つ",TSA_ROW),("つぁ",TSA_ROW),("つぃ",TSA_ROW),("つぇ",TSA_ROW),("つぉ",TSA_ROW),("て",TA_ROW),("てぃ",TA_ROW),("で",DA_ROW),("でぃ",DA_ROW),("と",TA_ROW),("とぅ",TA_ROW),("ど",DA_ROW),("どぅ",DA_ROW),("な",NA_ROW),("に",NA_ROW),("にゃ",NA_ROW),("にゅ",NA_ROW),("にょ",NA_ROW),("ぬ",NA_ROW),("ね",NA_ROW),("の",NA_ROW),("は",HA_ROW),("ば",BA_ROW),("ぱ",PA_ROW),("ひ",HA_ROW),("ひゃ",HYA_ROW),("ひゅ",HYA_ROW),("ひょ",HYA_ROW),("び",BA_ROW),("ぴ",PA_ROW),("ふ",FA_ROW),("ふぁ",FA_ROW),("ふぃ",FA_ROW),("ふぇ",FA_ROW),("ふぉ",FA_ROW),("ぶ",BA_ROW),("ぷ",PA_ROW),("へ",HA_ROW),("べ",BA_ROW),("ぺ",PA_ROW),("ほ",HA_ROW),("ぼ",BA_ROW),("ぽ",PA_ROW),("ま",MA_ROW),("み",MA_ROW),("みゃ",MYA_ROW),("みゅ",MYA_ROW),("みょ",MYA_ROW),("む",MA_ROW),("め",MA_ROW),("も",MA_ROW),("や",YA_ROW),("ゆ",YA_ROW),("よ",YA_ROW),("ら",RA_ROW),("り",RA_ROW),("りゃ",RYA_ROW),("りゅ",RYA_ROW),("りょ",RYA_ROW),("る",RA_ROW),("れ",RA_ROW),("ろ",RA_ROW),("わ",WA_ROW),("ゐ",WA_ROW),("ゑ",WA_ROW),("を",WA_ROW),("をぅ",WA_ROW) +]; + +/* +"あ","い","う","え","お", +"っあ","っい","っう","っえ","っお", +"か","き","く","け","こ", +"が","ぎ","ぐ","げ","ご", +"さ","すぃ","す","せ","そ", +"しゃ","し","しゅ","しぇ","しょ", +"ざ","ずぃ","ず","ぜ","ぞ", +"じゃ","じ","じゅ","じぇ","じょ", +"た","てぃ","とぅ","て","と", +"だ","でぃ","どぅ","で","ど", +"つぁ","つぃ","つ","つぇ","つぉ", +"ちゃ","ち","ちゅ","ちぇ","ちょ", +"な","に","ぬ","ね","の", +"は","ひ","へ","ほ", +"ふぁ","ふぃ","ふ","ふぇ","ふぉ", +"ば","び","ぶ","べ","ぼ", +"ぱ","ぴ","ぷ","ぺ","ぽ", +"ま","み","む","め","も", +"ら","り","る","れ","ろ", +"や","いぃ","ゆ","えぇ","よ", +"わ","ゐ","をぅ","ゑ","を", +"っや","っゆ","っよ", +"っわ","っゐ","っゑ","っを", +"きゃ","きゅ","きょ", +"くぁ","くぃ","くぇ","くぉ", +"ぎゃ","ぎゅ","ぎょ", +"ぐぁ","ぐぃ","ぐぇ","ぐぉ", +"にゃ","にゅ","にょ", +"ひゃ","ひゅ","ひょ", +"みゃ","みゅ","みょ", +"りゃ","りゅ","りょ", +"ん", +"っん" + +"あ","い","う","え","お","っあ","っい","っう","っえ","っお","か","き","く","け","こ","が","ぎ","ぐ","げ","ご","さ","すぃ","す","せ","そ","しゃ","し","しゅ","しぇ","しょ","ざ","ずぃ","ず","ぜ","ぞ","じゃ","じ","じゅ","じぇ","じょ","た","てぃ","とぅ","て","と","だ","でぃ","どぅ","で","ど","つぁ","つぃ","つ","つぇ","つぉ","ちゃ","ち","ちゅ","ちぇ","ちょ","な","に","ぬ","ね","の","は","ひ","へ","ほ","ふぁ","ふぃ","ふ","ふぇ","ふぉ","ば","び","ぶ","べ","ぼ","ぱ","ぴ","ぷ","ぺ","ぽ","ま","み","む","め","も","ら","り","る","れ","ろ","や","いぃ","ゆ","えぇ","よ","わ","ゐ","をぅ","ゑ","を","っや","っゆ","っよ","っわ","っゐ","っゑ","っを","きゃ","きゅ","きょ","くぁ","くぃ","くぇ","くぉ","ぎゃ","ぎゅ","ぎょ","ぐぁ","ぐぃ","ぐぇ","ぐぉ","にゃ","にゅ","にょ","ひゃ","ひゅ","ひょ","みゃ","みゅ","みょ","りゃ","りゅ","りょ","ん","っん" + + + +"あ","い","いぃ","う","え","えぇ","お","か","が","き","きゃ","きゅ","きょ","ぎ","ぎゃ","ぎゅ","ぎょ","く","くぁ","くぃ","くぇ","くぉ","ぐ","ぐぁ","ぐぃ","ぐぇ","ぐぉ","け","げ","こ","ご","さ","ざ","し","しぇ","しゃ","しゅ","しょ","じ","じぇ","じゃ","じゅ","じょ","す","すぃ","ず","ずぃ","せ","ぜ","そ","ぞ","た","だ","ち","ちぇ","ちゃ","ちゅ","ちょ","っあ","っい","っう","っえ","っお","っや","っゆ","っよ","っわ","っゐ","っゑ","っを","っん","つ","つぁ","つぃ","つぇ","つぉ","て","てぃ","で","でぃ","と","とぅ","ど","どぅ","な","に","にゃ","にゅ","にょ","ぬ","ね","の","は","ば","ぱ","ひ","ひゃ","ひゅ","ひょ","び","ぴ","ふ","ふぁ","ふぃ","ふぇ","ふぉ","ぶ","ぷ","へ","べ","ぺ","ほ","ぼ","ぽ","ま","み","みゃ","みゅ","みょ","む","め","も","や","ゆ","よ","ら","り","りゃ","りゅ","りょ","る","れ","ろ","わ","ゐ","ゑ","を","を","ん" +*/ + // 長音符 ー ちょうおんぷ From b4bd9e13ff7e873460de0ebe984940cc8e6c6786 Mon Sep 17 00:00:00 2001 From: James Haver Date: Wed, 14 Mar 2018 14:00:42 +0800 Subject: [PATCH 02/16] Start organizing languages into folders --- src/lib.rs | 7 +- src/okinawa.rs | 428 ++++++++++++++++++++-------------------- src/okinawan/#tests.rs# | 20 ++ src/okinawan/lib.rs | 12 ++ src/okinawan/tables.rs | 194 ++++++++++++++++++ src/okinawan/tests.rs | 20 ++ src/okinawan/util.rs | 219 ++++++++++++++++++++ src/util.rs | 225 +++------------------ 8 files changed, 713 insertions(+), 412 deletions(-) create mode 100644 src/okinawan/#tests.rs# create mode 100644 src/okinawan/lib.rs create mode 100644 src/okinawan/tables.rs create mode 100644 src/okinawan/tests.rs create mode 100644 src/okinawan/util.rs diff --git a/src/lib.rs b/src/lib.rs index e10e75d..4e297d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,9 @@ mod util; pub mod standard; pub mod kansai; -pub mod okinawa; +pub mod okinawan { + pub mod lib; + mod tables; + mod util; + mod tests; +} diff --git a/src/okinawa.rs b/src/okinawa.rs index f3d6c82..799a00f 100644 --- a/src/okinawa.rs +++ b/src/okinawa.rs @@ -1,13 +1,5 @@ use util; -pub const ROMAJI_TO_HIRAGANA_TABLE: &'static [(&str, &str)] = &[ - ("'nn","っん"),("'wa","っわ"),("'we","っゑ"),("'wi","っゐ"),("'wo","っを"),("'ya","っや"),("'yo","っよ"),("'yu","っゆ"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("da","だ"),("de","で"),("di","でぃ"),("do","ど"),("du","どぅ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyi","ぢぃ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gwa","ぐゎ"),("gwe","ぐぇ"),("gwi","ぐぃ"),("gwo","ぐぉ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kwa","くゎ"),("kwe","くぇ"),("kwi","くぃ"),("kwo","くぉ"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("n'","ん"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("qnn","っん"),("qwa","っわ"),("qwe","っゑ"),("qwi","っゐ"),("qwo","っを"),("qya","っや"),("qyo","っよ"),("qyu","っゆ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","すぃ"),("so","そ"),("su","す"),("ta","た"),("te","て"),("ti","てぃ"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tsi","つぃ"),("tso","つぉ"),("tsu","つ"),("tu","とぅ"),("u","う"),("wa","わ"),("we","ゑ"),("wi","ゐ"),("wo","を"),("wu","をぉ"),("xtsu","っ"),("xtu","っ"),("ya","や"),("ye","えぇ"),("yi","いぃ"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zi","ずぃ"),("zo","ぞ"),("zu","ず") -]; - -pub const HIRAGANA_TO_ROMAJI_TABLE: &'static [(&str, &str)] = &[ - ("あ","a"),("い","i"),("いぃ","yi"),("う","u"),("え","e"),("えぇ","ye"),("お","o"),("か","ka"),("が","ga"),("き","ki"),("きゃ","kya"),("きゅ","kyu"),("きょ","kyo"),("ぎ","gi"),("ぎゃ","gya"),("ぎゅ","gyu"),("ぎょ","gyo"),("く","ku"),("くぃ","kwi"),("くぇ","kwe"),("くぉ","kwo"),("くゎ","kwa"),("ぐ","gu"),("ぐぃ","gwi"),("ぐぇ","gwe"),("ぐぉ","gwo"),("ぐゎ","gwa"),("け","ke"),("げ","ge"),("こ","ko"),("ご","go"),("さ","sa"),("ざ","za"),("し","shi"),("しぇ","she"),("しゃ","sha"),("しゅ","shu"),("しょ","sho"),("じ","ji"),("じぇ","je"),("じゃ","ja"),("じゅ","ju"),("じょ","jo"),("す","su"),("すぃ","si"),("ず","zu"),("ずぃ","zi"),("せ","se"),("ぜ","ze"),("そ","so"),("ぞ","zo"),("た","ta"),("だ","da"),("ち","chi"),("ちぇ","che"),("ちゃ","cha"),("ちゅ","chu"),("ちょ","cho"),("ぢぃ","dyi"),("ぢぇ","dye"),("ぢゃ","dya"),("ぢゅ","dyu"),("ぢょ","dyo"),("っや","'ya"),("っゆ","'yu"),("っよ","'yo"),("っわ","'wa"),("っゐ","'wi"),("っゑ","'we"),("っを","'wo"),("っん","'n"),("つ","tsu"),("つぁ","tsa"),("つぃ","tsi"),("つぇ","tse"),("つぉ","tso"),("て","te"),("てぃ","ti"),("で","de"),("でぃ","di"),("と","to"),("とぅ","tu"),("ど","do"),("どぅ","du"),("な","na"),("に","ni"),("にゃ","nya"),("にゅ","nyu"),("にょ","nyo"),("ぬ","nu"),("ね","ne"),("の","no"),("は","ha"),("ば","ba"),("ぱ","pa"),("ひ","hi"),("ひゃ","hya"),("ひゅ","hyu"),("ひょ","hyo"),("び","bi"),("びゃ","bya"),("びゅ","byu"),("びょう","byo"),("ぴ","pi"),("ぴゃ","pya"),("ぴゅ","pyu"),("ぴょお","pyo"),("ふ","fu"),("ふ","fu"),("ふぁ","fa"),("ふぃ","fi"),("ふぇ","fe"),("ふぉ","fo"),("ぶ","bu"),("ぷ","pu"),("へ","he"),("べ","be"),("ぺ","pe"),("ほ","ho"),("ぼ","bo"),("ぽ","po"),("ま","ma"),("み","mi"),("みゃ","mya"),("みゅ","myu"),("みょ","myo"),("む","mu"),("め","me"),("も","mo"),("や","ya"),("ゆ","yu"),("よ","yo"),("ら","ra"),("り","ri"),("りゃ","rya"),("りゅ","ryu"),("りょ","ryo"),("る","ru"),("れ","re"),("ろ","ro"),("わ","wa"),("ゐ","wi"),("ゑ","we"),("を","wo"),("をぉ","wu"),("ん","n") -]; - /* わん I なー you @@ -53,192 +45,7 @@ pub const HIRAGANA_TO_ROMAJI_TABLE: &'static [(&str, &str)] = &[ */ -// if ん and next is ん or あいうえお then n' -// if ん and is b or p then m - -pub fn romaji_to_hiragana(romaji: &str) -> String { - let romaji_len = romaji.len(); - - let mut romaji_index = 0; - let mut window = 1; // 1 to 3 - let mut hiragana = "".to_string(); - - // convert 1 to 3 ascii chars to a single hirgana - // i is starting point, window is length to be looked up - while romaji_index < romaji_len && romaji_index + window < romaji_len + 1 { - let romaji_sub = &romaji[romaji_index .. romaji_index + window]; - - // check for gemminate consonant, if it exists, consume the char - // add a "っ" and skip the lookup - if window == 1 && romaji_index + 1 < romaji_len && util::is_consonant(romaji_sub) { - let romaji_peek_ahead = &romaji[romaji_index + 1 .. romaji_index + 2]; - - if romaji_sub == romaji_peek_ahead { - hiragana = format!("{}{}", hiragana, "っ"); - romaji_index += 1; - window = 1; - continue; - } - } - - match util::lookup(romaji_sub, ROMAJI_TO_HIRAGANA_TABLE) { - Some(hiragana_index) => { - let single_hiragana = ROMAJI_TO_HIRAGANA_TABLE[hiragana_index].1.to_string(); - hiragana = format!("{}{}", hiragana, single_hiragana); - romaji_index += window; - window = 1; - }, - None => { - // romaji_sub was not found in the table, increase the window size if it was - // not found. If the window is too large then move the index over and reset - // the window size to 1 - if window < 3 { - window += 1; - } else { - romaji_index += 1; - window = 1; - } - }, - } - } - - hiragana.to_string() -} -pub fn hiragana_to_romaji(hiragana: &str) -> String { - let hiragana_chars = hiragana.chars(); - let hiragana_len = hiragana_chars.count(); - - let mut hiragana_index = 0; - let mut window = - if hiragana_len > 1 { - 2 - } else { - 1 - }; - let mut romaji = "".to_string(); - - while hiragana_index < hiragana_len && hiragana_index + window < hiragana_len + 1 { - let hiragana_sub_vec = &hiragana.chars().collect::>()[hiragana_index .. hiragana_index + window]; - let hiragana_sub_string: String = hiragana_sub_vec.into_iter().collect(); - let hiragana_sub: &str = &hiragana_sub_string[..]; - - // look ahead - if window == 1 && hiragana_index + 1 < hiragana_len { - let hiragana_peek_ahead_vec = &hiragana.chars().collect::>()[hiragana_index + 1 .. hiragana_index + 2]; - let hiragana_peek_ahead: String = hiragana_peek_ahead_vec.into_iter().collect(); - - // "ん" as "n" or "m" - if hiragana_sub == "ん" { - let nasal = if util::hiragana_is_bilabial(&hiragana_peek_ahead) { - "m".to_string() - } else if util::hiragana_merges_n(&hiragana_peek_ahead) { - "n'".to_string() - } else { - "n".to_string() - }; - - romaji = format!("{}{}", romaji, nasal); - hiragana_index += window; - window = - if hiragana_len - hiragana_index > 1 { - 2 - } else { - 1 - }; - continue; - } - - // check for gemminate consonant, if it exists, consume the char - // add the geminnated consonant and skip the lookup - if hiragana_sub == "っ" { - let geminate = match util::lookup(&hiragana_peek_ahead, util::HIRAGANA_TO_GEMINATE_TABLE) { - Some(geminate_index) => util::HIRAGANA_TO_GEMINATE_TABLE[geminate_index].1.to_string(), - None => "".to_string() - }; - romaji = format!("{}{}", romaji, geminate); - hiragana_index += window; - window = - if hiragana_len - hiragana_index > 1 { - 2 - } else { - 1 - }; - continue; - } - } - - if window == 2 { - if hiragana_sub_vec[1] == 'ー' { - match util::lookup(&hiragana_sub_vec[0].to_string(), util::HIRAGANA_TO_VOWEL_TABLE) { - Some(vowel_index) => { - match util::lookup(&hiragana_sub_vec[0].to_string(), HIRAGANA_TO_ROMAJI_TABLE) { - Some(romaji_index) => { - let prev = HIRAGANA_TO_ROMAJI_TABLE[romaji_index].1.to_string(); - let hiragana_vowel = util::HIRAGANA_TO_VOWEL_TABLE[vowel_index].1.to_string(); - match util::lookup(&hiragana_vowel, HIRAGANA_TO_ROMAJI_TABLE) { - Some(romaji_vowel_index) => { - let romaji_vowel = HIRAGANA_TO_ROMAJI_TABLE[romaji_vowel_index].1.to_string(); - romaji = format!("{}{}{}", romaji, prev, romaji_vowel); - hiragana_index += window; - window = - if hiragana_len - hiragana_index > 1 { - 2 - } else { - 1 - }; - continue; - }, - None => { - () - } - } - }, - None => { - () - } - } - }, - None => { - () - } - } - } - } - - match util::lookup(hiragana_sub, HIRAGANA_TO_ROMAJI_TABLE) { - Some(romaji_index) => { - let single_romaji = HIRAGANA_TO_ROMAJI_TABLE[romaji_index].1.to_string(); - romaji = format!("{}{}", romaji, single_romaji); - hiragana_index += window; - window = - if hiragana_len - hiragana_index > 1 { - 2 - } else { - 1 - }; - }, - None => { - // hiragana_sub was not found in the table, increase the window size if it was - // not found. If the window is too large then move the index over and reset - // the window size relative to the length of string. - if window > 1 { - window -= 1; - } else { - hiragana_index += 1; - window = - if hiragana_len - hiragana_index > 1 { - 2 - } else { - 1 - }; - } - }, - } - } - - romaji.to_string() -} /* import Text.Show.Unicode (uprint) import Data.List (sort) @@ -484,19 +291,21 @@ pub enum VerbTypes { // jun = in pub enum VerbStem { + // 非過去否定 Base, // 基本語幹 base - // 基本語幹+a : N(否定), riiN(可能・受身), suN(使役) - // 基本語幹+ee : 条件形, 命令形 - // 基本語幹+i : 命令形 - // 基本語幹+u : na(な。禁止), ka(まで), kazirii(まで・かぎり) + // 基本語幹+a : N(否定), riiN(可能・受身), suN(使役), a/wa ば, あ列 + // 基本語幹+ee : 条件形, 命令形 え列 + // 基本語幹+i : 命令形 い列 + // 基本語幹+u : na(な。禁止), ka(まで), kazirii(まで・かぎり) う列 Connective, // 連用語幹 connective - // 連用語幹+i : ga(〜しに), ciroo(〜しそう), uusuN(〜できる), busaN(〜したい) - // 連用語幹+(j)abiiN/ibiiN : + // 連用語幹+i : ga(〜しに), ciroo(〜しそう), uusuN(〜できる), busaN(〜したい) // い列 + // 連用語幹+(j)abiiN/ibiiN : // あ列 い列 // 連用形 をり - Derivative, // 派生語幹 derivative stem + // 非過去 + Derivative, // 派生語幹 derivative stem u/i/○ // 派生語幹+uN/iN/N : 終止形(現在形) // 派生語幹+uru/iru/ru : baa(〜時)、hazi(〜はず), ru(ぞ) // 派生語幹+ura/ira/ra : 疑問の助詞ga(か) @@ -506,11 +315,12 @@ pub enum VerbStem { // 派生語幹+uti/iti/ti : 〜していたか = 過去進行中止形 // 派生語幹+uteeN/iteeN/teeN : 〜していただろう = 過去進行推量形 - Euphonic // 音便語幹 euphonic change stem - // 音便語幹+i : 〜して - // 音便語幹+aN : 〜した - // 音便語幹+eeN : (今までに)きっと〜している,〜したに違いない,〜してある - // 音便語幹+ooN : 〜している + // + Euphonic // 音便語幹 euphonic change stem  + // 音便語幹+i : 〜して い列 + // 音便語幹+aN : 〜した あ列 + // 音便語幹+eeN : (今までに)きっと〜している,〜したに違いない,〜してある え列 + // 音便語幹+ooN : 〜している お列 } pub enum VerbConjugation { @@ -551,6 +361,183 @@ pub enum VerbConjugation { InterrogativePastPoliteII // bitiiga } +/* +pub fn replace_last(x: &str, y: &str) -> String { + format!("{}{}", truncate_chars(x, x.chars().count()-1), y) +} + +util::lookups(a,b, util::HIRAGA_TABLE_TO_ROW) + + let hiragana_sub_vec = &hiragana.chars().collect::>()[hiragana_index .. hiragana_index + window]; + let hiragana_sub_string: String = hiragana_sub_vec.into_iter().collect(); + let hiragana_sub: &str = &hiragana_sub_string[..]; + + */ +/* +pub fn rl(verb: &str, y: &str) -> String { + let length = y.chars().count(); + let last: String = (&y.chars().collect::>()[length-1 .. length]).into_iter().collect(); + let new = util::lookups(last,"あ", util::HIRAGA_TABLE_TO_ROW) +} + +基本語幹 連用語幹 派生語幹 音便語幹 +那覇方言の第1種動詞の語幹 +書く ka k c c c kacuN(書く)、cicuN(聞く)、sacuN(咲く)、ʔaQcuN(歩く) +漕ぐ kuu g z z z kuuzuN(漕ぐ)、ʔwiizuN(泳ぐ)、ʔoozuN(扇ぐ) +立つ ta t c c Qc tacuN(立つ)、ʔucuN(打つ)、kacuN(勝つ) +育つ sura t c c c suracuN(育つ)、tamucuN(保つ)、kucuN(朽ちる) +殺す kuru s s s c kurusuN(殺す)、meesuN(燃やす)、haNsuN(外す) +為る ‐ s s s s suN(為る)、siQkwasuN(敷く)、hiQkoosuN(比較する) +呼ぶ ju b b b r/d jubuN(呼ぶ)、tubuN(飛ぶ)、musubuN(結ぶ) +読む ju m m m r/d jumuN(読む)、numuN(飲む)、ʔamuN(編む) +眠る niN r z z t niNzuN(眠る)、kaNzuN(被る)、ʔaNzuN(あぶる) +見る NN r z z c NNzuN(見る)、kuNzuN(括る) +死ぬ shi n n n j shinuN 死ぬん + +那覇方言の第2種動詞の語幹 +取る tu r ○/j ○ Qt tuiN(取る) +刈る ka r ○/j ○ t kaiN(刈る)、nubuiN(登る)、ʔaraiN(洗う) +蹴る ki r ○/j ○ Qc kiiN(蹴る)、ʔiiN(入る)、hiiN(放る)、ciiN(切る) +煮る ni r ○/j ○ c/(Qt) niiN(煮る)、ciiN(着る)、ʔiiN(言う)、iiN(座る) + +那覇方言の第3種動詞の語幹 +有る ʔa r ○/j/i ○ t ʔaN(有る)、uN(居る)、jaN(である) +*/ + +pub fn base_stem(verb: &str, vt: VerbTypes) -> String { + use self::VerbTypes::*; + match vt { + IV => "".to_string(), + III => format!("{}ら", truncate_chars(verb, verb.chars().count() - 2)), + II1 | II2 | II3 | II4 => format!("{}ら", truncate_chars(verb, verb.chars().count() - 2)), + I1 => format!("{}か", truncate_chars(verb, verb.chars().count() - 2)), + I2 => format!("{}が", truncate_chars(verb, verb.chars().count() - 2)), + I3 => format!("{}た", truncate_chars(verb, verb.chars().count() - 2)), + I4 => format!("{}た", truncate_chars(verb, verb.chars().count() - 2)), + I5 => format!("{}さ", truncate_chars(verb, verb.chars().count() - 2)), + I6 => format!("{}さ", truncate_chars(verb, verb.chars().count() - 2)), + I7 => format!("{}ば", truncate_chars(verb, verb.chars().count() - 2)), + I8 => format!("{}ま", truncate_chars(verb, verb.chars().count() - 2)), + I9 => format!("{}ら", truncate_chars(verb, verb.chars().count() - 2)), + I10 => format!("{}ら", truncate_chars(verb, verb.chars().count() - 2)), + } +} + +pub fn connective_stem(verb: &str, vt: VerbTypes) -> String { + use self::VerbTypes::*; + match vt { + IV => "".to_string(), + III => format!("{}あ", truncate_chars(verb, verb.chars().count() - 2)), + II1 | II2 | II3 | II4 => format!("{}あ", truncate_chars(verb, verb.chars().count() - 2)), + I1 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I2 => format!("{}じゃ", truncate_chars(verb, verb.chars().count() - 2)), + I3 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I4 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I5 => format!("{}さ", truncate_chars(verb, verb.chars().count() - 2)), + I6 => format!("{}さ", truncate_chars(verb, verb.chars().count() - 2)), + I7 => format!("{}ば", truncate_chars(verb, verb.chars().count() - 2)), + I8 => format!("{}ま", truncate_chars(verb, verb.chars().count() - 2)), + I9 => format!("{}じゃ", truncate_chars(verb, verb.chars().count() - 2)), + I10 => format!("{}じゃ", truncate_chars(verb, verb.chars().count() - 2)), + } +} + +pub fn derivative_stem(verb: &str, vt: VerbTypes) -> String { + use self::VerbTypes::*; + match vt { + IV => "".to_string(), + III => format!("{}あ", truncate_chars(verb, verb.chars().count() - 2)), + II1 | II2 | II3 | II4 => format!("{}あ", truncate_chars(verb, verb.chars().count() - 2)), + I1 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I2 => format!("{}じゃ", truncate_chars(verb, verb.chars().count() - 2)), + I3 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I4 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I5 => format!("{}さ", truncate_chars(verb, verb.chars().count() - 2)), + I6 => format!("{}さ", truncate_chars(verb, verb.chars().count() - 2)), + I7 => format!("{}ば", truncate_chars(verb, verb.chars().count() - 2)), + I8 => format!("{}ま", truncate_chars(verb, verb.chars().count() - 2)), + I9 => format!("{}じゃ", truncate_chars(verb, verb.chars().count() - 2)), + I10 => format!("{}じゃ", truncate_chars(verb, verb.chars().count() - 2)), + } +} + +pub fn euphonic_stem(verb: &str, vt: VerbTypes) -> String { + use self::VerbTypes::*; + match vt { + IV => "".to_string(), + III => format!("{}た", truncate_chars(verb, verb.chars().count() - 2)), + II1 | II2 | II3 | II4 => format!("{}ら", truncate_chars(verb, verb.chars().count() - 2)), + I1 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I2 => format!("{}じゃ", truncate_chars(verb, verb.chars().count() - 2)), + I3 => format!("{}っちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I4 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I5 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + I6 => format!("{}さ", truncate_chars(verb, verb.chars().count() - 2)), + I7 => format!("{}ら", truncate_chars(verb, verb.chars().count() - 2)), + I8 => format!("{}ら", truncate_chars(verb, verb.chars().count() - 2)), + I9 => format!("{}た", truncate_chars(verb, verb.chars().count() - 2)), + I10 => format!("{}ちゃ", truncate_chars(verb, verb.chars().count() - 2)), + } +} + +pub fn chars_sub_str(s: &str, start: usize, end: usize) -> String { + let s_sub_vec = &s.chars().collect::>()[start .. end]; + let s_sub_string: String = s_sub_vec.into_iter().collect(); + s_sub_string +} + +pub fn chars_split(s: &str, split: usize) -> (String,String) { + let l_sub_vec = &s.chars().collect::>()[0 .. split]; + let l: String = l_sub_vec.into_iter().collect(); + + let r_sub_vec = &s.chars().collect::>()[split .. s.chars().count()]; + let r: String = r_sub_vec.into_iter().collect(); + + (l,r) +} + +pub fn replace_last_with_vowel(word: &str, vowel: &str) -> String { + let word_len = word.chars().count(); + if word_len > 1 { + let (left, right) = chars_split(word, word_len-2); + + match util::lookups_string(&right[..], vowel, util::HIRAGANA_TO_ROW_TABLE) { + Some(tail) => { + format!("{}{}", left, tail) + }, + None => { + let (left, right) = chars_split(word, word_len-1); + + match util::lookups_string(&right[..], vowel, util::HIRAGANA_TO_ROW_TABLE) { + Some(tail) => format!("{}{}", left, tail), + None => word.to_string() + } + } + } + } else if word_len == 1 { + let (left, right) = chars_split(word, word_len-1); + + match util::lookups_string(&right[..], vowel, util::HIRAGANA_TO_ROW_TABLE) { + Some(tail) => format!("{}{}", left, tail), + None => word.to_string() + } + } else { + word.to_string() + } + + // let hiragana_sub_vec = &word.chars().collect::>()[hiragana_index .. hiragana_index + window]; + // let hiragana_sub_string: String = hiragana_sub_vec.into_iter().collect(); + // let hiragana_sub: &str = &hiragana_sub_string[..]; +} + +pub fn conjugate_verb(verb: &str, vt: VerbTypes, conjugation: VerbConjugation) -> String { + use self::VerbConjugation::*; + match conjugation { + NonPast => verb.to_string(), + NonPastNegative => base_stem(verb, vt), + _ => verb.to_string() + } +} pub fn conjugate_iv_verbs(verb: &str, conjugation: VerbConjugation) -> &str { if verb == "chuun" { @@ -824,13 +811,18 @@ mod tests { assert_eq!(get_verb_stem("jumuN",VerbStem::Euphonic,VerbTypes::I8), "jur".to_string()); } + #[test] + fn test_conjugate_verb() { + assert_eq!(conjugate_verb("とぅいん",VerbTypes::II2, VerbConjugation::NonPastNegative), "とぅら".to_string()); + } + #[test] fn test_iv() { } #[test] - fn test_conjugate_verb() { + fn test_conjugate_verbs() { assert_eq!(conjugate_verbs("jaN",VerbTypes::III, VerbConjugation::NonPastNegative), "jaraN".to_string()); // conjugations assert_eq!(conjugate_verbs("kacuN",VerbTypes::I1, VerbConjugation::NonPastNegative), "kakaN".to_string()); @@ -980,6 +972,21 @@ mod tests { assert_eq!(conjugate_adj("しじか", AdjType::Yan, AdjConjugation::NonPast), "しじかやん".to_string()); } + + #[test] + fn test_replace_last_with_vowel() { + assert_eq!(replace_last_with_vowel("かた","あ"), "かた".to_string()); + assert_eq!(replace_last_with_vowel("かた","い"), "かてぃ".to_string()); + assert_eq!(replace_last_with_vowel("かた","う"), "かとぅ".to_string()); + assert_eq!(replace_last_with_vowel("かた","え"), "かて".to_string()); + assert_eq!(replace_last_with_vowel("かた","お"), "かと".to_string()); + + assert_eq!(replace_last_with_vowel("かちゃ","あ"), "かちゃ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","い"), "かち".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","う"), "かちゅ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","え"), "かちぇ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","お"), "かちょ".to_string()); + } } /* @@ -1006,15 +1013,6 @@ eat もーいん【舞いん】 もーやー【舞やー】 mooyaa "dancer". -那覇方言の第2種動詞の語幹 -取る tu r ○/j ○ Qt tuiN(取る) -刈る ka r ○/j ○ t kaiN(刈る)、nubuiN(登る)、ʔaraiN(洗う) -蹴る ki r ○/j ○ Qc kiiN(蹴る)、ʔiiN(入る)、hiiN(放る)、ciiN(切る) -煮る ni r ○/j ○ c/(Qt) niiN(煮る)、ciiN(着る)、ʔiiN(言う)、iiN(座る) - -那覇方言の第3種動詞の語幹 -有る ʔa r ○/j/i ○ t ʔaN(有る)、uN(居る)、jaN(である) - Ia1 (tuin,take),(wakain,understand) Ia2 warain, laugh Ib1 chiin, don (clothes) diff --git a/src/okinawan/#tests.rs# b/src/okinawan/#tests.rs# new file mode 100644 index 0000000..787c985 --- /dev/null +++ b/src/okinawan/#tests.rs# @@ -0,0 +1,20 @@ +use okinawan::util::*; + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_replace_last_with_vowel() { + assert_eq!(replace_last_with_vowel("かた","あ"), "かた".to_string()); + assert_eq!(replace_last_with_vowel("かた","い"), "かてぃ".to_string()); + assert_eq!(replace_last_with_vowel("かた","う"), "かとぅ".to_string()); + assert_eq!(replace_last_with_vowel("かた","え"), "かて".to_string()); + assert_eq!(replace_last_with_vowel("かた","お"), "かと".to_string()); + + assert_eq!(replace_last_with_vowel("かちゃ","あ"), "かちゃ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","い"), "かち".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","う"), "かちゅ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","え"), "かちぇ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","お"), "かちょ".to_string()); + } +} diff --git a/src/okinawan/lib.rs b/src/okinawan/lib.rs new file mode 100644 index 0000000..2011898 --- /dev/null +++ b/src/okinawan/lib.rs @@ -0,0 +1,12 @@ +use util as u; +use okinawan::util as uu; +// use okinawan::util as uu; +// mod util; + +pub fn truncate_chars(s: &str, max_chars: usize) -> &str { + match s.char_indices().nth(max_chars) { + None => s, + Some((idx, _)) => &s[..idx], + } +} + diff --git a/src/okinawan/tables.rs b/src/okinawan/tables.rs new file mode 100644 index 0000000..89960ba --- /dev/null +++ b/src/okinawan/tables.rs @@ -0,0 +1,194 @@ + +// okinawan hepburn romaji +pub const ROMAJI_TO_HIRAGANA_TABLE: &'static [(&str, &str)] = &[ + ("'nn","っん"),("'wa","っわ"),("'we","っゑ"),("'wi","っゐ"),("'wo","っを"),("'ya","っや"),("'yo","っよ"),("'yu","っゆ"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("da","だ"),("de","で"),("di","でぃ"),("do","ど"),("du","どぅ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyi","ぢぃ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gwa","ぐゎ"),("gwe","ぐぇ"),("gwi","ぐぃ"),("gwo","ぐぉ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kwa","くゎ"),("kwe","くぇ"),("kwi","くぃ"),("kwo","くぉ"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("n'","ん"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("qnn","っん"),("qwa","っわ"),("qwe","っゑ"),("qwi","っゐ"),("qwo","っを"),("qya","っや"),("qyo","っよ"),("qyu","っゆ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","すぃ"),("so","そ"),("su","す"),("ta","た"),("te","て"),("ti","てぃ"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tsi","つぃ"),("tso","つぉ"),("tsu","つ"),("tu","とぅ"),("u","う"),("wa","わ"),("we","ゑ"),("wi","ゐ"),("wo","を"),("wu","をぉ"),("xtsu","っ"),("xtu","っ"),("ya","や"),("ye","えぇ"),("yi","いぃ"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zi","ずぃ"),("zo","ぞ"),("zu","ず") +]; + +// okinawan hiragana +pub const HIRAGANA_TO_ROMAJI_TABLE: &'static [(&str, &str)] = &[ + ("あ","a"),("い","i"),("いぃ","yi"),("う","u"),("え","e"),("えぇ","ye"),("お","o"),("か","ka"),("が","ga"),("き","ki"),("きゃ","kya"),("きゅ","kyu"),("きょ","kyo"),("ぎ","gi"),("ぎゃ","gya"),("ぎゅ","gyu"),("ぎょ","gyo"),("く","ku"),("くぃ","kwi"),("くぇ","kwe"),("くぉ","kwo"),("くゎ","kwa"),("ぐ","gu"),("ぐぃ","gwi"),("ぐぇ","gwe"),("ぐぉ","gwo"),("ぐゎ","gwa"),("け","ke"),("げ","ge"),("こ","ko"),("ご","go"),("さ","sa"),("ざ","za"),("し","shi"),("しぇ","she"),("しゃ","sha"),("しゅ","shu"),("しょ","sho"),("じ","ji"),("じぇ","je"),("じゃ","ja"),("じゅ","ju"),("じょ","jo"),("す","su"),("すぃ","si"),("ず","zu"),("ずぃ","zi"),("せ","se"),("ぜ","ze"),("そ","so"),("ぞ","zo"),("た","ta"),("だ","da"),("ち","chi"),("ちぇ","che"),("ちゃ","cha"),("ちゅ","chu"),("ちょ","cho"),("ぢぃ","dyi"),("ぢぇ","dye"),("ぢゃ","dya"),("ぢゅ","dyu"),("ぢょ","dyo"),("っや","'ya"),("っゆ","'yu"),("っよ","'yo"),("っわ","'wa"),("っゐ","'wi"),("っゑ","'we"),("っを","'wo"),("っん","'n"),("つ","tsu"),("つぁ","tsa"),("つぃ","tsi"),("つぇ","tse"),("つぉ","tso"),("て","te"),("てぃ","ti"),("で","de"),("でぃ","di"),("と","to"),("とぅ","tu"),("ど","do"),("どぅ","du"),("な","na"),("に","ni"),("にゃ","nya"),("にゅ","nyu"),("にょ","nyo"),("ぬ","nu"),("ね","ne"),("の","no"),("は","ha"),("ば","ba"),("ぱ","pa"),("ひ","hi"),("ひゃ","hya"),("ひゅ","hyu"),("ひょ","hyo"),("び","bi"),("びゃ","bya"),("びゅ","byu"),("びょう","byo"),("ぴ","pi"),("ぴゃ","pya"),("ぴゅ","pyu"),("ぴょお","pyo"),("ふ","fu"),("ふ","fu"),("ふぁ","fa"),("ふぃ","fi"),("ふぇ","fe"),("ふぉ","fo"),("ぶ","bu"),("ぷ","pu"),("へ","he"),("べ","be"),("ぺ","pe"),("ほ","ho"),("ぼ","bo"),("ぽ","po"),("ま","ma"),("み","mi"),("みゃ","mya"),("みゅ","myu"),("みょ","myo"),("む","mu"),("め","me"),("も","mo"),("や","ya"),("ゆ","yu"),("よ","yo"),("ら","ra"),("り","ri"),("りゃ","rya"),("りゅ","ryu"),("りょ","ryo"),("る","ru"),("れ","re"),("ろ","ro"),("わ","wa"),("ゐ","wi"),("ゑ","we"),("を","wo"),("をぉ","wu"),("ん","n") +]; + +// hiragana rows + +pub const A_ROW: &'static [(&str, &str)] = &[ + ("あ","あ"),("い","い"),("う","う"),("え","え"),("お","お") +]; + +pub const YA_ROW: &'static [(&str, &str)] = &[ + ("あ","や"),("い","いぃ"),("う","ゆ"),("え","いぇ"),("お","よ") +]; + +pub const WA_ROW: &'static [(&str, &str)] = &[ + ("あ","わ"),("い","ゐ"),("う","をぅ"),("え","ゑ"),("お","を") +]; + +// qyi and qye do not exist +pub const QYA_ROW: &'static [(&str, &str)] = &[ + ("あ","っや"),("う","っゆ"),("お","っよ") +]; + +// qwu does not exist +pub const QWA_ROW: &'static [(&str, &str)] = &[ + ("あ","っわ"),("い","っゐ"),("え","っゑ"),("お","っを") +]; + +pub const QA_ROW: &'static [(&str, &str)] = &[ + ("あ","っあ"),("い","っい"),("う","っう"),("え","っえ"),("お","っお") +]; + +pub const KA_ROW: &'static [(&str, &str)] = &[ + ("あ","か"),("い","き"),("う","く"),("え","け"),("お","こ") +]; + +// kye and kyi do not exits +pub const KYA_ROW: &'static [(&str, &str)] = &[ + ("あ","きゃ"),("う","きゅ"),("お","きょ") +]; + +// kwu does not exist +pub const KWA_ROW: &'static [(&str, &str)] = &[ + ("あ","くぁ"),("い","くぃ"),("え","くぇ"),("お","くぉ") +]; + +pub const GA_ROW: &'static [(&str, &str)] = &[ + ("あ","が"),("い","ぎ"),("う","ぐ"),("え","げ"),("お","ご") +]; + +// gyi and gye do not exist +pub const GYA_ROW: &'static [(&str, &str)] = &[ + ("あ","ぎゃ"),("う","ぎゅ"),("お","ぎょ") +]; + +// gwu does not exist +pub const GWA_ROW: &'static [(&str, &str)] = &[ + ("あ","ぐぁ"),("い","ぐぃ"),("え","ぐぇ"),("お","ぐぉ") +]; + +pub const SA_ROW: &'static [(&str, &str)] = &[ + ("あ","さ"),("い","すぃ"),("う","す"),("え","せ"),("お","そ") +]; + +pub const SHA_ROW: &'static [(&str, &str)] = &[ + ("あ","しゃ"),("い","し"),("う","しゅ"),("え","しぇ"),("お","しょ") +]; + +pub const ZA_ROW: &'static [(&str, &str)] = &[ + ("あ","ざ"),("い","ずぃ"),("う","ず"),("え","ぜ"),("お","ぞ") +]; + +pub const JA_ROW: &'static [(&str, &str)] = &[ + ("あ","じゃ"),("い","じ"),("う","じゅ"),("え","じぇ"),("お","じょ") +]; + +pub const TA_ROW: &'static [(&str, &str)] = &[ + ("あ","た"),("い","てぃ"),("う","とぅ"),("え","て"),("お","と") +]; + +pub const DA_ROW: &'static [(&str, &str)] = &[ + ("あ","だ"),("い","でぃ"),("う","どぅ"),("え","で"),("お","ど") +]; + +pub const TSA_ROW: &'static [(&str, &str)] = &[ + ("あ","つぁ"),("い","つぃ"),("う","つ"),("え","つぇ"),("お","つぉ") +]; + +pub const CHA_ROW: &'static [(&str, &str)] = &[ + ("あ","ちゃ"),("い","ち"),("う","ちゅ"),("え","ちぇ"),("お","ちょ") +]; + +pub const NA_ROW: &'static [(&str, &str)] = &[ + ("あ","な"),("い","に"),("う","ぬ"),("え","ね"),("お","の") +]; + +pub const HA_ROW: &'static [(&str, &str)] = &[ + ("あ","は"),("い","ひ"),("う","ふ"),("え","へ"),("お","ほ") +]; + +pub const FA_ROW: &'static [(&str, &str)] = &[ + ("あ","ふぁ"),("い","ふぃ"),("う","ふ"),("え","ふぇ"),("お","ふぉ") +]; + +pub const BA_ROW: &'static [(&str, &str)] = &[ + ("あ","ば"),("い","び"),("う","ぶ"),("え","べ"),("お","ぼ") +]; + +pub const PA_ROW: &'static [(&str, &str)] = &[ + ("あ","ぱ"),("い","ぴ"),("う","ぷ"),("え","ぺ"),("お","ぽ") +]; + +pub const MA_ROW: &'static [(&str, &str)] = &[ + ("あ","ま"),("い","み"),("う","む"),("え","め"),("お","も") +]; + +pub const RA_ROW: &'static [(&str, &str)] = &[ + ("あ","ら"),("い","り"),("う","る"),("え","れ"),("お","ろ") +]; + +// kye and kyi do not exits +pub const NYA_ROW: &'static [(&str, &str)] = &[ + ("あ","にゃ"),("う","にゅ"),("お","にょ") +]; + +// kye and kyi do not exits +pub const HYA_ROW: &'static [(&str, &str)] = &[ + ("あ","ひゃ"),("う","ひゅ"),("お","ひょ") +]; + +// mye and myi do not exits +pub const MYA_ROW: &'static [(&str, &str)] = &[ + ("あ","きゃ"),("う","きゅ"),("お","きょ") +]; + +// kye and kyi do not exits +pub const RYA_ROW: &'static [(&str, &str)] = &[ + ("あ","りゃ"),("う","りゅ"),("お","りょ") +]; + +pub const HIRAGANA_TO_ROW_TABLE: &'static [(&str, &'static [(&str, &str)])] = &[ + ("い",A_ROW),("いぃ",YA_ROW),("う",A_ROW),("え",A_ROW),("えぇ",YA_ROW),("お",A_ROW),("か",KA_ROW),("が",GA_ROW),("き",KA_ROW),("きゃ",KYA_ROW),("きゅ",KYA_ROW),("きょ",KYA_ROW),("ぎ",GA_ROW),("ぎゃ",GYA_ROW),("ぎゅ",GYA_ROW),("ぎょ",GYA_ROW),("く",KA_ROW),("くぁ",KWA_ROW),("くぃ",KWA_ROW),("くぇ",KWA_ROW),("くぉ",KWA_ROW),("ぐ",GA_ROW),("ぐぁ",GWA_ROW),("ぐぃ",GWA_ROW),("ぐぇ",GWA_ROW),("ぐぉ",GWA_ROW),("け",KA_ROW),("げ",GA_ROW),("こ",KA_ROW),("ご",KA_ROW),("さ",SA_ROW),("ざ",ZA_ROW),("し",SHA_ROW),("しぇ",SHA_ROW),("しゃ",SHA_ROW),("しゅ",SHA_ROW),("しょ",SHA_ROW),("じ",JA_ROW),("じぇ",JA_ROW),("じゃ",JA_ROW),("じゅ",JA_ROW),("じょ",JA_ROW),("す",SA_ROW),("すぃ",SA_ROW),("ず",ZA_ROW),("ずぃ",ZA_ROW),("せ",SA_ROW),("ぜ",ZA_ROW),("そ",SA_ROW),("ぞ",ZA_ROW),("た",TA_ROW),("だ",DA_ROW),("ち",CHA_ROW),("ちぇ",CHA_ROW),("ちゃ",CHA_ROW),("ちゅ",CHA_ROW),("ちょ",CHA_ROW),("っあ",QA_ROW),("っい",QA_ROW),("っう",QA_ROW),("っえ",QA_ROW),("っお",QA_ROW),("っや",QYA_ROW),("っゆ",QYA_ROW),("っよ",QYA_ROW),("っわ",QWA_ROW),("っゐ",QWA_ROW),("っゑ",QWA_ROW),("っを",QWA_ROW),("つ",TSA_ROW),("つぁ",TSA_ROW),("つぃ",TSA_ROW),("つぇ",TSA_ROW),("つぉ",TSA_ROW),("て",TA_ROW),("てぃ",TA_ROW),("で",DA_ROW),("でぃ",DA_ROW),("と",TA_ROW),("とぅ",TA_ROW),("ど",DA_ROW),("どぅ",DA_ROW),("な",NA_ROW),("に",NA_ROW),("にゃ",NA_ROW),("にゅ",NA_ROW),("にょ",NA_ROW),("ぬ",NA_ROW),("ね",NA_ROW),("の",NA_ROW),("は",HA_ROW),("ば",BA_ROW),("ぱ",PA_ROW),("ひ",HA_ROW),("ひゃ",HYA_ROW),("ひゅ",HYA_ROW),("ひょ",HYA_ROW),("び",BA_ROW),("ぴ",PA_ROW),("ふ",FA_ROW),("ふぁ",FA_ROW),("ふぃ",FA_ROW),("ふぇ",FA_ROW),("ふぉ",FA_ROW),("ぶ",BA_ROW),("ぷ",PA_ROW),("へ",HA_ROW),("べ",BA_ROW),("ぺ",PA_ROW),("ほ",HA_ROW),("ぼ",BA_ROW),("ぽ",PA_ROW),("ま",MA_ROW),("み",MA_ROW),("みゃ",MYA_ROW),("みゅ",MYA_ROW),("みょ",MYA_ROW),("む",MA_ROW),("め",MA_ROW),("も",MA_ROW),("や",YA_ROW),("ゆ",YA_ROW),("よ",YA_ROW),("ら",RA_ROW),("り",RA_ROW),("りゃ",RYA_ROW),("りゅ",RYA_ROW),("りょ",RYA_ROW),("る",RA_ROW),("れ",RA_ROW),("ろ",RA_ROW),("わ",WA_ROW),("ゐ",WA_ROW),("ゑ",WA_ROW),("を",WA_ROW),("をぅ",WA_ROW) +]; + +// 長音符 ー ちょうおんぷ + +/* +"あ","い","う","え","お", +"っあ","っい","っう","っえ","っお", +"か","き","く","け","こ", +"が","ぎ","ぐ","げ","ご", +"さ","すぃ","す","せ","そ", +"しゃ","し","しゅ","しぇ","しょ", +"ざ","ずぃ","ず","ぜ","ぞ", +"じゃ","じ","じゅ","じぇ","じょ", +"た","てぃ","とぅ","て","と", +"だ","でぃ","どぅ","で","ど", +"つぁ","つぃ","つ","つぇ","つぉ", +"ちゃ","ち","ちゅ","ちぇ","ちょ", +"な","に","ぬ","ね","の", +"は","ひ","へ","ほ", +"ふぁ","ふぃ","ふ","ふぇ","ふぉ", +"ば","び","ぶ","べ","ぼ", +"ぱ","ぴ","ぷ","ぺ","ぽ", +"ま","み","む","め","も", +"ら","り","る","れ","ろ", +"や","いぃ","ゆ","えぇ","よ", +"わ","ゐ","をぅ","ゑ","を", +"っや","っゆ","っよ", +"っわ","っゐ","っゑ","っを", +"きゃ","きゅ","きょ", +"くぁ","くぃ","くぇ","くぉ", +"ぎゃ","ぎゅ","ぎょ", +"ぐぁ","ぐぃ","ぐぇ","ぐぉ", +"にゃ","にゅ","にょ", +"ひゃ","ひゅ","ひょ", +"みゃ","みゅ","みょ", +"りゃ","りゅ","りょ", +"ん", +"っん" + +"あ","い","う","え","お","っあ","っい","っう","っえ","っお","か","き","く","け","こ","が","ぎ","ぐ","げ","ご","さ","すぃ","す","せ","そ","しゃ","し","しゅ","しぇ","しょ","ざ","ずぃ","ず","ぜ","ぞ","じゃ","じ","じゅ","じぇ","じょ","た","てぃ","とぅ","て","と","だ","でぃ","どぅ","で","ど","つぁ","つぃ","つ","つぇ","つぉ","ちゃ","ち","ちゅ","ちぇ","ちょ","な","に","ぬ","ね","の","は","ひ","へ","ほ","ふぁ","ふぃ","ふ","ふぇ","ふぉ","ば","び","ぶ","べ","ぼ","ぱ","ぴ","ぷ","ぺ","ぽ","ま","み","む","め","も","ら","り","る","れ","ろ","や","いぃ","ゆ","えぇ","よ","わ","ゐ","をぅ","ゑ","を","っや","っゆ","っよ","っわ","っゐ","っゑ","っを","きゃ","きゅ","きょ","くぁ","くぃ","くぇ","くぉ","ぎゃ","ぎゅ","ぎょ","ぐぁ","ぐぃ","ぐぇ","ぐぉ","にゃ","にゅ","にょ","ひゃ","ひゅ","ひょ","みゃ","みゅ","みょ","りゃ","りゅ","りょ","ん","っん" + + + +"あ","い","いぃ","う","え","えぇ","お","か","が","き","きゃ","きゅ","きょ","ぎ","ぎゃ","ぎゅ","ぎょ","く","くぁ","くぃ","くぇ","くぉ","ぐ","ぐぁ","ぐぃ","ぐぇ","ぐぉ","け","げ","こ","ご","さ","ざ","し","しぇ","しゃ","しゅ","しょ","じ","じぇ","じゃ","じゅ","じょ","す","すぃ","ず","ずぃ","せ","ぜ","そ","ぞ","た","だ","ち","ちぇ","ちゃ","ちゅ","ちょ","っあ","っい","っう","っえ","っお","っや","っゆ","っよ","っわ","っゐ","っゑ","っを","っん","つ","つぁ","つぃ","つぇ","つぉ","て","てぃ","で","でぃ","と","とぅ","ど","どぅ","な","に","にゃ","にゅ","にょ","ぬ","ね","の","は","ば","ぱ","ひ","ひゃ","ひゅ","ひょ","び","ぴ","ふ","ふぁ","ふぃ","ふぇ","ふぉ","ぶ","ぷ","へ","べ","ぺ","ほ","ぼ","ぽ","ま","み","みゃ","みゅ","みょ","む","め","も","や","ゆ","よ","ら","り","りゃ","りゅ","りょ","る","れ","ろ","わ","ゐ","ゑ","を","を","ん" +*/ diff --git a/src/okinawan/tests.rs b/src/okinawan/tests.rs new file mode 100644 index 0000000..787c985 --- /dev/null +++ b/src/okinawan/tests.rs @@ -0,0 +1,20 @@ +use okinawan::util::*; + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_replace_last_with_vowel() { + assert_eq!(replace_last_with_vowel("かた","あ"), "かた".to_string()); + assert_eq!(replace_last_with_vowel("かた","い"), "かてぃ".to_string()); + assert_eq!(replace_last_with_vowel("かた","う"), "かとぅ".to_string()); + assert_eq!(replace_last_with_vowel("かた","え"), "かて".to_string()); + assert_eq!(replace_last_with_vowel("かた","お"), "かと".to_string()); + + assert_eq!(replace_last_with_vowel("かちゃ","あ"), "かちゃ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","い"), "かち".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","う"), "かちゅ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","え"), "かちぇ".to_string()); + assert_eq!(replace_last_with_vowel("かちゃ","お"), "かちょ".to_string()); + } +} diff --git a/src/okinawan/util.rs b/src/okinawan/util.rs new file mode 100644 index 0000000..42c0927 --- /dev/null +++ b/src/okinawan/util.rs @@ -0,0 +1,219 @@ +use okinawan::tables::*; +use util; + +// if ん and next is ん or あいうえお then n' +// if ん and is b or p then m + +pub fn romaji_to_hiragana(romaji: &str) -> String { + let romaji_len = romaji.len(); + + let mut romaji_index = 0; + let mut window = 1; // 1 to 3 + let mut hiragana = "".to_string(); + + // convert 1 to 3 ascii chars to a single hirgana + // i is starting point, window is length to be looked up + while romaji_index < romaji_len && romaji_index + window < romaji_len + 1 { + let romaji_sub = &romaji[romaji_index .. romaji_index + window]; + + // check for gemminate consonant, if it exists, consume the char + // add a "っ" and skip the lookup + if window == 1 && romaji_index + 1 < romaji_len && util::is_consonant(romaji_sub) { + let romaji_peek_ahead = &romaji[romaji_index + 1 .. romaji_index + 2]; + + if romaji_sub == romaji_peek_ahead { + hiragana = format!("{}{}", hiragana, "っ"); + romaji_index += 1; + window = 1; + continue; + } + } + + match util::lookup(romaji_sub, ROMAJI_TO_HIRAGANA_TABLE) { + Some(hiragana_index) => { + let single_hiragana = ROMAJI_TO_HIRAGANA_TABLE[hiragana_index].1.to_string(); + hiragana = format!("{}{}", hiragana, single_hiragana); + romaji_index += window; + window = 1; + }, + None => { + // romaji_sub was not found in the table, increase the window size if it was + // not found. If the window is too large then move the index over and reset + // the window size to 1 + if window < 3 { + window += 1; + } else { + romaji_index += 1; + window = 1; + } + }, + } + } + + hiragana.to_string() +} + +pub fn hiragana_to_romaji(hiragana: &str) -> String { + let hiragana_chars = hiragana.chars(); + let hiragana_len = hiragana_chars.count(); + + let mut hiragana_index = 0; + let mut window = + if hiragana_len > 1 { + 2 + } else { + 1 + }; + let mut romaji = "".to_string(); + + while hiragana_index < hiragana_len && hiragana_index + window < hiragana_len + 1 { + let hiragana_sub_vec = &hiragana.chars().collect::>()[hiragana_index .. hiragana_index + window]; + let hiragana_sub_string: String = hiragana_sub_vec.into_iter().collect(); + let hiragana_sub: &str = &hiragana_sub_string[..]; + + // look ahead + if window == 1 && hiragana_index + 1 < hiragana_len { + let hiragana_peek_ahead_vec = &hiragana.chars().collect::>()[hiragana_index + 1 .. hiragana_index + 2]; + let hiragana_peek_ahead: String = hiragana_peek_ahead_vec.into_iter().collect(); + + // "ん" as "n" or "m" + if hiragana_sub == "ん" { + let nasal = if util::hiragana_is_bilabial(&hiragana_peek_ahead) { + "m".to_string() + } else if util::hiragana_merges_n(&hiragana_peek_ahead) { + "n'".to_string() + } else { + "n".to_string() + }; + + romaji = format!("{}{}", romaji, nasal); + hiragana_index += window; + window = + if hiragana_len - hiragana_index > 1 { + 2 + } else { + 1 + }; + continue; + } + + // check for gemminate consonant, if it exists, consume the char + // add the geminnated consonant and skip the lookup + if hiragana_sub == "っ" { + let geminate = match util::lookup(&hiragana_peek_ahead, util::HIRAGANA_TO_GEMINATE_TABLE) { + Some(geminate_index) => util::HIRAGANA_TO_GEMINATE_TABLE[geminate_index].1.to_string(), + None => "".to_string() + }; + romaji = format!("{}{}", romaji, geminate); + hiragana_index += window; + window = + if hiragana_len - hiragana_index > 1 { + 2 + } else { + 1 + }; + continue; + } + } + + if window == 2 { + if hiragana_sub_vec[1] == 'ー' { + match util::lookup(&hiragana_sub_vec[0].to_string(), util::HIRAGANA_TO_VOWEL_TABLE) { + Some(vowel_index) => { + match util::lookup(&hiragana_sub_vec[0].to_string(), HIRAGANA_TO_ROMAJI_TABLE) { + Some(romaji_index) => { + let prev = HIRAGANA_TO_ROMAJI_TABLE[romaji_index].1.to_string(); + let hiragana_vowel = util::HIRAGANA_TO_VOWEL_TABLE[vowel_index].1.to_string(); + match util::lookup(&hiragana_vowel, HIRAGANA_TO_ROMAJI_TABLE) { + Some(romaji_vowel_index) => { + let romaji_vowel = HIRAGANA_TO_ROMAJI_TABLE[romaji_vowel_index].1.to_string(); + romaji = format!("{}{}{}", romaji, prev, romaji_vowel); + hiragana_index += window; + window = + if hiragana_len - hiragana_index > 1 { + 2 + } else { + 1 + }; + continue; + }, + None => { + () + } + } + }, + None => { + () + } + } + }, + None => { + () + } + } + } + } + + match util::lookup(hiragana_sub, HIRAGANA_TO_ROMAJI_TABLE) { + Some(romaji_index) => { + let single_romaji = HIRAGANA_TO_ROMAJI_TABLE[romaji_index].1.to_string(); + romaji = format!("{}{}", romaji, single_romaji); + hiragana_index += window; + window = + if hiragana_len - hiragana_index > 1 { + 2 + } else { + 1 + }; + }, + None => { + // hiragana_sub was not found in the table, increase the window size if it was + // not found. If the window is too large then move the index over and reset + // the window size relative to the length of string. + if window > 1 { + window -= 1; + } else { + hiragana_index += 1; + window = + if hiragana_len - hiragana_index > 1 { + 2 + } else { + 1 + }; + } + }, + } + } + + romaji.to_string() +} + +pub fn replace_last_with_vowel(word: &str, vowel: &str) -> String { + let word_len = word.chars().count(); + if word_len > 1 { + let (left, right) = util::chars_split(word, word_len-2); + + match util::lookups_string(&right[..], vowel, HIRAGANA_TO_ROW_TABLE) { + Some(tail) => { + format!("{}{}", left, tail) + }, + None => { + let (left, right) = util::chars_split(word, word_len-1); + + match util::lookups_string(&right[..], vowel, HIRAGANA_TO_ROW_TABLE) { + Some(tail) => format!("{}{}", left, tail), + None => word.to_string() + } + } + } + } else if word_len == 1 { + let (left, right) = util::chars_split(word, word_len-1); + + match util::lookups_string(&right[..], vowel, HIRAGANA_TO_ROW_TABLE) { + Some(tail) => format!("{}{}", left, tail), + None => word.to_string() + } + } else { + word.to_string() + } +} diff --git a/src/util.rs b/src/util.rs index 5ea4a86..6b22b0e 100644 --- a/src/util.rs +++ b/src/util.rs @@ -12,6 +12,17 @@ pub fn lookup(s: &str, table: &'static [(&str,&str)]) -> Option { } } +pub fn lookup_string(s: &str, table: &'static [(&str,&str)]) -> Option { + match table.binary_search_by(|&(key, _)| { + if s == key { Equal } + else if key < s { Less } + else { Greater } + }) { + Ok(i) => Some(table[i].1.to_string()), + Err(_) => None, + } +} + pub fn lookups(s: &str,x: &str, table: &'static [(&str, &'static [(&str,&str)])]) -> Option { match table.binary_search_by(|&(key, _)| { if s == key { Equal } @@ -23,6 +34,17 @@ pub fn lookups(s: &str,x: &str, table: &'static [(&str, &'static [(&str,&str)])] } } +pub fn lookups_string(s: &str,x: &str, table: &'static [(&str, &'static [(&str,&str)])]) -> Option { + match table.binary_search_by(|&(key, _)| { + if s == key { Equal } + else if key < s { Less } + else { Greater } + }) { + Ok(i) => lookup_string(x, table[i].1), + Err(_) => None, + } +} + pub fn is_consonant(s: &str) -> bool { s.len() == 1 && !["a","e","i","n","o","u","y"].contains(&s) } @@ -47,201 +69,12 @@ pub const HIRAGANA_TO_GEMINATE_TABLE: &'static [(&str, &str)] = &[ ("ぁ",""),("あ",""),("ぃ",""),("い",""),("ぅ",""),("う",""),("ぇ",""),("え",""),("ぉ",""),("お",""),("か","k"),("が","g"),("き","k"),("ぎ","g"),("く","k"),("ぐ","g"),("け","k"),("げ","g"),("こ","k"),("ご","g"),("さ","s"),("ざ","z"),("し","s"),("じ","j"),("す","s"),("ず","z"),("せ","s"),("ぜ","z"),("そ","s"),("ぞ","z"),("た","t"),("だ","d"),("ち","t"),("ぢ","d"),("つ","t"),("づ","d"),("て","t"),("で","d"),("と","t"),("ど","d"),("な","n"),("に","n"),("ぬ","n"),("ね","n"),("の","n"),("は","h"),("ば","b"),("ぱ","p"),("ひ","h"),("び","b"),("ぴ","p"),("ふ","f"),("ぶ","b"),("ぷ","p"),("へ","h"),("べ","b"),("ぺ","b"),("ほ","h"),("ぼ","b"),("ぽ","p"),("ま","m"),("み","m"),("む","m"),("め","m"),("も","m"),("ゃ","y"),("や","y"),("ゅ","y"),("ゆ","y"),("ょ","y"),("よ","y"),("ら","r"),("り","r"),("る","r"),("れ","r"),("ろ","r"),("わ","w"),("ゐ","w"),("ゑ","w"),("を","w"),("ゔ","v"),("ゕ","k"),("ゖ","k") ]; +pub fn chars_split(s: &str, split: usize) -> (String,String) { + let l_sub_vec = &s.chars().collect::>()[0 .. split]; + let l: String = l_sub_vec.into_iter().collect(); -pub const HIRAGANA_TABLE: &'static [(&str, &'static [(&str, &str)])] = &[ - ("あ",&[("あ","あ"),("い","い"),("う","う"),("え","え"),("お","お")]), - ("か",&[("あ","か"),("い","き"),("う","く"),("え","け"),("お","こ")]), - ("さ",&[("あ","さ"),("い","し"),("う","す"),("え","せ"),("お","そ")]), - ("た",&[("あ","た"),("い","ち"),("う","つ"),("え","て"),("お","と")]), - ("な",&[("あ","な"),("い","に"),("う","ぬ"),("え","ね"),("お","の")]), - ("は",&[("あ","は"),("い","ひ"),("う","ふ"),("え","へ"),("お","ほ")]), - ("ま",&[("あ","ま"),("い","み"),("う","む"),("え","め"),("お","も")]), - ("や",&[("あ","や"),("い","いぃ"),("う","ゆ"),("え","いぇ"),("お","よ")]), - ("ら",&[("あ","ら"),("い","り"),("う","る"),("え","れ"),("お","ろ")]), - ("わ",&[("あ","わ"),("い","ゐ"),("う","う"),("え","ゑ"),("お","を")]) -]; - -pub const A_ROW: &'static [(&str, &str)] = &[ - ("あ","あ"),("い","い"),("う","う"),("え","え"),("お","お") -]; - -pub const YA_ROW: &'static [(&str, &str)] = &[ - ("あ","や"),("い","いぃ"),("う","ゆ"),("え","いぇ"),("お","よ") -]; - -pub const WA_ROW: &'static [(&str, &str)] = &[ - ("あ","わ"),("い","ゐ"),("う","をぅ"),("え","ゑ"),("お","を") -]; - -// qyi and qye do not exist -pub const QYA_ROW: &'static [(&str, &str)] = &[ - ("あ","っや"),("う","っゆ"),("お","っよ") -]; - -// qwu does not exist -pub const QWA_ROW: &'static [(&str, &str)] = &[ - ("あ","っわ"),("い","っゐ"),("え","っゑ"),("お","っを") -]; - -pub const QA_ROW: &'static [(&str, &str)] = &[ - ("あ","っあ"),("い","っい"),("う","っう"),("え","っえ"),("お","っお") -]; - -pub const KA_ROW: &'static [(&str, &str)] = &[ - ("あ","か"),("い","き"),("う","く"),("え","け"),("お","こ") -]; - -// kye and kyi do not exits -pub const KYA_ROW: &'static [(&str, &str)] = &[ - ("あ","きゃ"),("う","きゅ"),("お","きょ") -]; + let r_sub_vec = &s.chars().collect::>()[split .. s.chars().count()]; + let r: String = r_sub_vec.into_iter().collect(); -// kwu does not exist -pub const KWA_ROW: &'static [(&str, &str)] = &[ - ("あ","くぁ"),("い","くぃ"),("え","くぇ"),("お","くぉ") -]; - -pub const GA_ROW: &'static [(&str, &str)] = &[ - ("あ","が"),("い","ぎ"),("う","ぐ"),("え","げ"),("お","ご") -]; - -// gyi and gye do not exist -pub const GYA_ROW: &'static [(&str, &str)] = &[ - ("あ","ぎゃ"),("う","ぎゅ"),("お","ぎょ") -]; - -// gwu does not exist -pub const GWA_ROW: &'static [(&str, &str)] = &[ - ("あ","ぐぁ"),("い","ぐぃ"),("え","ぐぇ"),("お","ぐぉ") -]; - -pub const SA_ROW: &'static [(&str, &str)] = &[ - ("あ","さ"),("い","すぃ"),("う","す"),("え","せ"),("お","そ") -]; - -pub const SHA_ROW: &'static [(&str, &str)] = &[ - ("あ","しゃ"),("い","し"),("う","しゅ"),("え","しぇ"),("お","しょ") -]; - -pub const ZA_ROW: &'static [(&str, &str)] = &[ - ("あ","ざ"),("い","ずぃ"),("う","ず"),("え","ぜ"),("お","ぞ") -]; - -pub const JA_ROW: &'static [(&str, &str)] = &[ - ("あ","じゃ"),("い","じ"),("う","じゅ"),("え","じぇ"),("お","じょ") -]; - -pub const TA_ROW: &'static [(&str, &str)] = &[ - ("あ","た"),("い","てぃ"),("う","とぅ"),("え","て"),("お","と") -]; - -pub const DA_ROW: &'static [(&str, &str)] = &[ - ("あ","だ"),("い","でぃ"),("う","どぅ"),("え","で"),("お","ど") -]; - -pub const TSA_ROW: &'static [(&str, &str)] = &[ - ("あ","つぁ"),("い","つぃ"),("う","つ"),("え","つぇ"),("お","つぉ") -]; - -pub const CHA_ROW: &'static [(&str, &str)] = &[ - ("あ","ちゃ"),("い","ち"),("う","ちゅ"),("え","ちぇ"),("お","ちょ") -]; - -pub const NA_ROW: &'static [(&str, &str)] = &[ - ("あ","な"),("い","に"),("う","ぬ"),("え","ね"),("お","の") -]; - -// incomplete - -pub const HA_ROW: &'static [(&str, &str)] = &[ - ("あ","は"),("い","ひ"),("う","ふ"),("え","へ"),("お","ほ") -]; - -pub const FA_ROW: &'static [(&str, &str)] = &[ - ("あ","ふぁ"),("い","ふぃ"),("う","ふ"),("え","ふぇ"),("お","ふぉ") -]; - -pub const BA_ROW: &'static [(&str, &str)] = &[ - ("あ","ば"),("い","び"),("う","ぶ"),("え","べ"),("お","ぼ") -]; - -pub const PA_ROW: &'static [(&str, &str)] = &[ - ("あ","ぱ"),("い","ぴ"),("う","ぷ"),("え","ぺ"),("お","ぽ") -]; - -pub const MA_ROW: &'static [(&str, &str)] = &[ - ("あ","ま"),("い","み"),("う","む"),("え","め"),("お","も") -]; - -pub const RA_ROW: &'static [(&str, &str)] = &[ - ("あ","ら"),("い","り"),("う","る"),("え","れ"),("お","ろ") -]; - -// kye and kyi do not exits -pub const NYA_ROW: &'static [(&str, &str)] = &[ - ("あ","にゃ"),("う","にゅ"),("お","にょ") -]; - -// kye and kyi do not exits -pub const HYA_ROW: &'static [(&str, &str)] = &[ - ("あ","ひゃ"),("う","ひゅ"),("お","ひょ") -]; - -// mye and myi do not exits -pub const MYA_ROW: &'static [(&str, &str)] = &[ - ("あ","きゃ"),("う","きゅ"),("お","きょ") -]; - -// kye and kyi do not exits -pub const RYA_ROW: &'static [(&str, &str)] = &[ - ("あ","りゃ"),("う","りゅ"),("お","りょ") -]; - - -pub const HIRAGANA_TABLE2: &'static [(&str, &'static [(&str, &str)])] = &[ - ("い",A_ROW),("いぃ",YA_ROW),("う",A_ROW),("え",A_ROW),("えぇ",YA_ROW),("お",A_ROW),("か",KA_ROW),("が",GA_ROW),("き",KA_ROW),("きゃ",KYA_ROW),("きゅ",KYA_ROW),("きょ",KYA_ROW),("ぎ",GA_ROW),("ぎゃ",GYA_ROW),("ぎゅ",GYA_ROW),("ぎょ",GYA_ROW),("く",KA_ROW),("くぁ",KWA_ROW),("くぃ",KWA_ROW),("くぇ",KWA_ROW),("くぉ",KWA_ROW),("ぐ",GA_ROW),("ぐぁ",GWA_ROW),("ぐぃ",GWA_ROW),("ぐぇ",GWA_ROW),("ぐぉ",GWA_ROW),("け",KA_ROW),("げ",GA_ROW),("こ",KA_ROW),("ご",KA_ROW),("さ",SA_ROW),("ざ",ZA_ROW),("し",SHA_ROW),("しぇ",SHA_ROW),("しゃ",SHA_ROW),("しゅ",SHA_ROW),("しょ",SHA_ROW),("じ",JA_ROW),("じぇ",JA_ROW),("じゃ",JA_ROW),("じゅ",JA_ROW),("じょ",JA_ROW),("す",SA_ROW),("すぃ",SA_ROW),("ず",ZA_ROW),("ずぃ",ZA_ROW),("せ",SA_ROW),("ぜ",ZA_ROW),("そ",SA_ROW),("ぞ",ZA_ROW),("た",TA_ROW),("だ",DA_ROW),("ち",CHA_ROW),("ちぇ",CHA_ROW),("ちゃ",CHA_ROW),("ちゅ",CHA_ROW),("ちょ",CHA_ROW),("っあ",QA_ROW),("っい",QA_ROW),("っう",QA_ROW),("っえ",QA_ROW),("っお",QA_ROW),("っや",QYA_ROW),("っゆ",QYA_ROW),("っよ",QYA_ROW),("っわ",QWA_ROW),("っゐ",QWA_ROW),("っゑ",QWA_ROW),("っを",QWA_ROW),("つ",TSA_ROW),("つぁ",TSA_ROW),("つぃ",TSA_ROW),("つぇ",TSA_ROW),("つぉ",TSA_ROW),("て",TA_ROW),("てぃ",TA_ROW),("で",DA_ROW),("でぃ",DA_ROW),("と",TA_ROW),("とぅ",TA_ROW),("ど",DA_ROW),("どぅ",DA_ROW),("な",NA_ROW),("に",NA_ROW),("にゃ",NA_ROW),("にゅ",NA_ROW),("にょ",NA_ROW),("ぬ",NA_ROW),("ね",NA_ROW),("の",NA_ROW),("は",HA_ROW),("ば",BA_ROW),("ぱ",PA_ROW),("ひ",HA_ROW),("ひゃ",HYA_ROW),("ひゅ",HYA_ROW),("ひょ",HYA_ROW),("び",BA_ROW),("ぴ",PA_ROW),("ふ",FA_ROW),("ふぁ",FA_ROW),("ふぃ",FA_ROW),("ふぇ",FA_ROW),("ふぉ",FA_ROW),("ぶ",BA_ROW),("ぷ",PA_ROW),("へ",HA_ROW),("べ",BA_ROW),("ぺ",PA_ROW),("ほ",HA_ROW),("ぼ",BA_ROW),("ぽ",PA_ROW),("ま",MA_ROW),("み",MA_ROW),("みゃ",MYA_ROW),("みゅ",MYA_ROW),("みょ",MYA_ROW),("む",MA_ROW),("め",MA_ROW),("も",MA_ROW),("や",YA_ROW),("ゆ",YA_ROW),("よ",YA_ROW),("ら",RA_ROW),("り",RA_ROW),("りゃ",RYA_ROW),("りゅ",RYA_ROW),("りょ",RYA_ROW),("る",RA_ROW),("れ",RA_ROW),("ろ",RA_ROW),("わ",WA_ROW),("ゐ",WA_ROW),("ゑ",WA_ROW),("を",WA_ROW),("をぅ",WA_ROW) -]; - -/* -"あ","い","う","え","お", -"っあ","っい","っう","っえ","っお", -"か","き","く","け","こ", -"が","ぎ","ぐ","げ","ご", -"さ","すぃ","す","せ","そ", -"しゃ","し","しゅ","しぇ","しょ", -"ざ","ずぃ","ず","ぜ","ぞ", -"じゃ","じ","じゅ","じぇ","じょ", -"た","てぃ","とぅ","て","と", -"だ","でぃ","どぅ","で","ど", -"つぁ","つぃ","つ","つぇ","つぉ", -"ちゃ","ち","ちゅ","ちぇ","ちょ", -"な","に","ぬ","ね","の", -"は","ひ","へ","ほ", -"ふぁ","ふぃ","ふ","ふぇ","ふぉ", -"ば","び","ぶ","べ","ぼ", -"ぱ","ぴ","ぷ","ぺ","ぽ", -"ま","み","む","め","も", -"ら","り","る","れ","ろ", -"や","いぃ","ゆ","えぇ","よ", -"わ","ゐ","をぅ","ゑ","を", -"っや","っゆ","っよ", -"っわ","っゐ","っゑ","っを", -"きゃ","きゅ","きょ", -"くぁ","くぃ","くぇ","くぉ", -"ぎゃ","ぎゅ","ぎょ", -"ぐぁ","ぐぃ","ぐぇ","ぐぉ", -"にゃ","にゅ","にょ", -"ひゃ","ひゅ","ひょ", -"みゃ","みゅ","みょ", -"りゃ","りゅ","りょ", -"ん", -"っん" - -"あ","い","う","え","お","っあ","っい","っう","っえ","っお","か","き","く","け","こ","が","ぎ","ぐ","げ","ご","さ","すぃ","す","せ","そ","しゃ","し","しゅ","しぇ","しょ","ざ","ずぃ","ず","ぜ","ぞ","じゃ","じ","じゅ","じぇ","じょ","た","てぃ","とぅ","て","と","だ","でぃ","どぅ","で","ど","つぁ","つぃ","つ","つぇ","つぉ","ちゃ","ち","ちゅ","ちぇ","ちょ","な","に","ぬ","ね","の","は","ひ","へ","ほ","ふぁ","ふぃ","ふ","ふぇ","ふぉ","ば","び","ぶ","べ","ぼ","ぱ","ぴ","ぷ","ぺ","ぽ","ま","み","む","め","も","ら","り","る","れ","ろ","や","いぃ","ゆ","えぇ","よ","わ","ゐ","をぅ","ゑ","を","っや","っゆ","っよ","っわ","っゐ","っゑ","っを","きゃ","きゅ","きょ","くぁ","くぃ","くぇ","くぉ","ぎゃ","ぎゅ","ぎょ","ぐぁ","ぐぃ","ぐぇ","ぐぉ","にゃ","にゅ","にょ","ひゃ","ひゅ","ひょ","みゃ","みゅ","みょ","りゃ","りゅ","りょ","ん","っん" - - - -"あ","い","いぃ","う","え","えぇ","お","か","が","き","きゃ","きゅ","きょ","ぎ","ぎゃ","ぎゅ","ぎょ","く","くぁ","くぃ","くぇ","くぉ","ぐ","ぐぁ","ぐぃ","ぐぇ","ぐぉ","け","げ","こ","ご","さ","ざ","し","しぇ","しゃ","しゅ","しょ","じ","じぇ","じゃ","じゅ","じょ","す","すぃ","ず","ずぃ","せ","ぜ","そ","ぞ","た","だ","ち","ちぇ","ちゃ","ちゅ","ちょ","っあ","っい","っう","っえ","っお","っや","っゆ","っよ","っわ","っゐ","っゑ","っを","っん","つ","つぁ","つぃ","つぇ","つぉ","て","てぃ","で","でぃ","と","とぅ","ど","どぅ","な","に","にゃ","にゅ","にょ","ぬ","ね","の","は","ば","ぱ","ひ","ひゃ","ひゅ","ひょ","び","ぴ","ふ","ふぁ","ふぃ","ふぇ","ふぉ","ぶ","ぷ","へ","べ","ぺ","ほ","ぼ","ぽ","ま","み","みゃ","みゅ","みょ","む","め","も","や","ゆ","よ","ら","り","りゃ","りゅ","りょ","る","れ","ろ","わ","ゐ","ゑ","を","を","ん" -*/ - -// 長音符 ー ちょうおんぷ + (l,r) +} From db61a10eca2b967da244f8c78d085cbf8353cdf5 Mon Sep 17 00:00:00 2001 From: James Haver Date: Sun, 18 Mar 2018 06:33:06 +0800 Subject: [PATCH 03/16] Progress on Hiragana based conjugation --- src/okinawan/#tests.rs# | 20 ---- src/okinawan/lib.rs | 221 ++++++++++++++++++++++++++++++++++++++-- src/okinawan/tables.rs | 9 +- src/okinawan/tests.rs | 60 ++++++++++- src/okinawan/util.rs | 28 +++++ src/util.rs | 19 ++++ 6 files changed, 327 insertions(+), 30 deletions(-) delete mode 100644 src/okinawan/#tests.rs# diff --git a/src/okinawan/#tests.rs# b/src/okinawan/#tests.rs# deleted file mode 100644 index 787c985..0000000 --- a/src/okinawan/#tests.rs# +++ /dev/null @@ -1,20 +0,0 @@ -use okinawan::util::*; - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn test_replace_last_with_vowel() { - assert_eq!(replace_last_with_vowel("かた","あ"), "かた".to_string()); - assert_eq!(replace_last_with_vowel("かた","い"), "かてぃ".to_string()); - assert_eq!(replace_last_with_vowel("かた","う"), "かとぅ".to_string()); - assert_eq!(replace_last_with_vowel("かた","え"), "かて".to_string()); - assert_eq!(replace_last_with_vowel("かた","お"), "かと".to_string()); - - assert_eq!(replace_last_with_vowel("かちゃ","あ"), "かちゃ".to_string()); - assert_eq!(replace_last_with_vowel("かちゃ","い"), "かち".to_string()); - assert_eq!(replace_last_with_vowel("かちゃ","う"), "かちゅ".to_string()); - assert_eq!(replace_last_with_vowel("かちゃ","え"), "かちぇ".to_string()); - assert_eq!(replace_last_with_vowel("かちゃ","お"), "かちょ".to_string()); - } -} diff --git a/src/okinawan/lib.rs b/src/okinawan/lib.rs index 2011898..85c3fa4 100644 --- a/src/okinawan/lib.rs +++ b/src/okinawan/lib.rs @@ -1,12 +1,221 @@ -use util as u; -use okinawan::util as uu; +use util::{truncate_chars, split_chars_at, lookups_string}; +use okinawan::tables::{HIRAGANA_TO_ROW_TABLE}; +use okinawan::util::{remove_last_mora}; // use okinawan::util as uu; // mod util; -pub fn truncate_chars(s: &str, max_chars: usize) -> &str { - match s.char_indices().nth(max_chars) { - None => s, - Some((idx, _)) => &s[..idx], +pub enum VerbType { + I1, + I2, + I3, + I4, + I5, + I6, + I7, + I8, + I9, + I10, + II1, + II2, + II3, + II4, + III, + IV +} + +pub enum VerbStem { + // 非過去否定 + Base, // 基本語幹 base + // 基本語幹+a : N(否定), riiN(可能・受身), suN(使役), a/wa ば, あ列 + // 基本語幹+ee : 条件形, 命令形 え列 + // 基本語幹+i : 命令形 い列 + // 基本語幹+u : na(な。禁止), ka(まで), kazirii(まで・かぎり) う列 + + Connective, // 連用語幹 connective + // 連用語幹+i : ga(〜しに), ciroo(〜しそう), uusuN(〜できる), busaN(〜したい) // い列 + // 連用語幹+(j)abiiN/ibiiN : // あ列 い列 + + // 連用形 をり + + // 非過去 + Derivative, // 派生語幹 derivative stem u/i/○ + // 派生語幹+uN/iN/N : 終止形(現在形) + // 派生語幹+uru/iru/ru : baa(〜時)、hazi(〜はず), ru(ぞ) + // 派生語幹+ura/ira/ra : 疑問の助詞ga(か) + // 派生語幹+uraa/iraa/raa : 「〜なら」という条件を表す。 + // 派生語幹+u/i/○ : si(の)、siga(〜のだが)、sa(よ)、gutu(理由)、ga(疑問)、mi・i(たずね) + // 派生語幹+utaN/itaN/taN : 〜していた = 過去進行形 + // 派生語幹+uti/iti/ti : 〜していたか = 過去進行中止形 + // 派生語幹+uteeN/iteeN/teeN : 〜していただろう = 過去進行推量形 + + // + Euphonic // 音便語幹 euphonic change stem  + // 音便語幹+i : 〜して い列 + // 音便語幹+aN : 〜した あ列 + // 音便語幹+eeN : (今までに)きっと〜している,〜したに違いない,〜してある え列 + // 音便語幹+ooN : 〜している お列 +} + +pub enum VerbConjugation { + NonPast, // in/un/n 辞書形 + NonPastNegative, // ~an 否定形 + Past, // ~an + PastNegative, // ~antan + + // ClauseEnding, // i, does/ and 連用形 + // Connective, // (y)a + + + YesNoInterrogative, // ~mi + WhInterrogative, // ~ga + + Honorific, + Potential, // able to ~juusun + Desiderative, // desire, want to + Imperative, + Prohibitive, // prohibitive + + Volitional, + Causative, // ~sun + Passive, // riiN rijuN + Continuative, // ti form + AttributiveNonPast, // N -> ru + AttributivePast, // N -> ru + Progressive, // + + Gerund, // 音便語幹+i : 〜して, ti ティ形 + NonPastPolite, // biin + NonPastNegativePolite, // biran + InterrogativePolite, // biimi + InterrogativePoliteII, // biiga + PastPolite, // bitan + PastNegativePoilte, // birantan + InterrogativePastPolite, // bitii + InterrogativePastPoliteII // bitiiga +} + +fn remove_last_two_moras(s: &str) -> String { + let first = remove_last_mora(s); + remove_last_mora(&first) +} + +pub fn base_stem(verb: &str, vt: VerbType) -> String { + let bare_stem = remove_last_two_moras(verb); + use self::VerbType::*; + match vt { + IV => "".to_string(), + III | II1 | II2 | II3 | II4 => format!("{}ら", bare_stem), + I1 => format!("{}か", bare_stem), + I2 => format!("{}が", bare_stem), + I3 => format!("{}た", bare_stem), + I4 => format!("{}た", bare_stem), + I5 => format!("{}さ", bare_stem), + I6 => format!("{}さ", bare_stem), + I7 => format!("{}ば", bare_stem), + I8 => format!("{}ま", bare_stem), + I9 => format!("{}ら", bare_stem), + I10 => format!("{}ら", bare_stem) } } +pub fn connective_stem(verb: &str, vt: VerbType) -> String { + let bare_stem = remove_last_two_moras(verb); + use self::VerbType::*; + match vt { + IV => "".to_string(), + III | II1 | II2 | II3 | II4 => format!("{}あ", bare_stem), + I1 => format!("{}ちゃ", bare_stem), + I2 => format!("{}じゃ", bare_stem), + I3 => format!("{}ちゃ", bare_stem), + I4 => format!("{}ちゃ", bare_stem), + I5 => format!("{}さ", bare_stem), + I6 => format!("{}さ", bare_stem), + I7 => format!("{}ば", bare_stem), + I8 => format!("{}ま", bare_stem), + I9 => format!("{}じゃ", bare_stem), + I10 => format!("{}じゃ", bare_stem) + } +} + +pub fn derivative_stem(verb: &str, vt: VerbType) -> String { + let bare_stem = remove_last_two_moras(verb); + use self::VerbType::*; + match vt { + IV => "".to_string(), + III | II1 | II2 | II3 | II4 => format!("{}あ", bare_stem), + I1 => format!("{}ちゃ", bare_stem), + I2 => format!("{}じゃ", bare_stem), + I3 => format!("{}ちゃ", bare_stem), + I4 => format!("{}ちゃ", bare_stem), + I5 => format!("{}さ", bare_stem), + I6 => format!("{}さ", bare_stem), + I7 => format!("{}ば", bare_stem), + I8 => format!("{}ま", bare_stem), + I9 => format!("{}じゃ", bare_stem), + I10 => format!("{}じゃ", bare_stem) + } +} + +pub fn euphonic_stem(verb: &str, vt: VerbType) -> String { + let bare_stem = remove_last_two_moras(verb); + use self::VerbType::*; + match vt { + IV => "".to_string(), + III => format!("{}た", bare_stem), + II1 | II2 | II3 | II4 => format!("{}ら", bare_stem), + I1 => format!("{}ちゃ", bare_stem), + I2 => format!("{}じゃ", bare_stem), + I3 => format!("{}っちゃ", bare_stem), + I4 => format!("{}ちゃ", bare_stem), + I5 => format!("{}ちゃ", bare_stem), + I6 => format!("{}さ", bare_stem), + I7 => format!("{}ら", bare_stem), + I8 => format!("{}ら", bare_stem), + I9 => format!("{}た", bare_stem), + I10 => format!("{}ちゃ", bare_stem), + } +} + +pub fn replace_last_with_vowel(word: &str, vowel: &str) -> String { + let word_len = word.chars().count(); + if word_len > 1 { + let (left, right) = split_chars_at(word, word_len-2); + + match lookups_string(&right[..], vowel, HIRAGANA_TO_ROW_TABLE) { + Some(tail) => { + format!("{}{}", left, tail) + }, + None => { + let (left, right) = split_chars_at(word, word_len-1); + + match lookups_string(&right[..], vowel, HIRAGANA_TO_ROW_TABLE) { + Some(tail) => format!("{}{}", left, tail), + None => word.to_string() + } + } + } + } else if word_len == 1 { + let (left, right) = split_chars_at(word, word_len-1); + + match lookups_string(&right[..], vowel, HIRAGANA_TO_ROW_TABLE) { + Some(tail) => format!("{}{}", left, tail), + None => word.to_string() + } + } else { + word.to_string() + } +} + +pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> String { + use self::VerbConjugation::*; + match conjugation { + // NonPast => verb.to_string(), + NonPast => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "ん"), + NonPastNegative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "あ"), "ん"), + PastNegative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "あ"), "んたん"), + Past => format!("{}{}", replace_last_with_vowel(&euphonic_stem(verb, vt), "あ"), "ん"), + NonPastPolite => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "あ"), "びーん"), + + _ => verb.to_string() + } +} diff --git a/src/okinawan/tables.rs b/src/okinawan/tables.rs index 89960ba..6290033 100644 --- a/src/okinawan/tables.rs +++ b/src/okinawan/tables.rs @@ -1,12 +1,13 @@ + // okinawan hepburn romaji pub const ROMAJI_TO_HIRAGANA_TABLE: &'static [(&str, &str)] = &[ - ("'nn","っん"),("'wa","っわ"),("'we","っゑ"),("'wi","っゐ"),("'wo","っを"),("'ya","っや"),("'yo","っよ"),("'yu","っゆ"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("da","だ"),("de","で"),("di","でぃ"),("do","ど"),("du","どぅ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyi","ぢぃ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gwa","ぐゎ"),("gwe","ぐぇ"),("gwi","ぐぃ"),("gwo","ぐぉ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kwa","くゎ"),("kwe","くぇ"),("kwi","くぃ"),("kwo","くぉ"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("n'","ん"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("qnn","っん"),("qwa","っわ"),("qwe","っゑ"),("qwi","っゐ"),("qwo","っを"),("qya","っや"),("qyo","っよ"),("qyu","っゆ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","すぃ"),("so","そ"),("su","す"),("ta","た"),("te","て"),("ti","てぃ"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tsi","つぃ"),("tso","つぉ"),("tsu","つ"),("tu","とぅ"),("u","う"),("wa","わ"),("we","ゑ"),("wi","ゐ"),("wo","を"),("wu","をぉ"),("xtsu","っ"),("xtu","っ"),("ya","や"),("ye","えぇ"),("yi","いぃ"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zi","ずぃ"),("zo","ぞ"),("zu","ず") + ("'nn","っん"),("'wa","っわ"),("'we","っゑ"),("'wi","っゐ"),("'wo","っを"),("'ya","っや"),("'yo","っよ"),("'yu","っゆ"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょ"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("da","だ"),("de","で"),("di","でぃ"),("do","ど"),("du","どぅ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyi","ぢぃ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gwa","ぐゎ"),("gwe","ぐぇ"),("gwi","ぐぃ"),("gwo","ぐぉ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kwa","くゎ"),("kwe","くぇ"),("kwi","くぃ"),("kwo","くぉ"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("n'","ん"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょ"),("pyu","ぴゅ"),("qnn","っん"),("qwa","っわ"),("qwe","っゑ"),("qwi","っゐ"),("qwo","っを"),("qya","っや"),("qyo","っよ"),("qyu","っゆ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","すぃ"),("so","そ"),("su","す"),("ta","た"),("te","て"),("ti","てぃ"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tsi","つぃ"),("tso","つぉ"),("tsu","つ"),("tu","とぅ"),("u","う"),("wa","わ"),("we","ゑ"),("wi","ゐ"),("wo","を"),("wu","をぉ"),("xtsu","っ"),("xtu","っ"),("ya","や"),("ye","えぇ"),("yi","いぃ"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zi","ずぃ"),("zo","ぞ"),("zu","ず") ]; // okinawan hiragana pub const HIRAGANA_TO_ROMAJI_TABLE: &'static [(&str, &str)] = &[ - ("あ","a"),("い","i"),("いぃ","yi"),("う","u"),("え","e"),("えぇ","ye"),("お","o"),("か","ka"),("が","ga"),("き","ki"),("きゃ","kya"),("きゅ","kyu"),("きょ","kyo"),("ぎ","gi"),("ぎゃ","gya"),("ぎゅ","gyu"),("ぎょ","gyo"),("く","ku"),("くぃ","kwi"),("くぇ","kwe"),("くぉ","kwo"),("くゎ","kwa"),("ぐ","gu"),("ぐぃ","gwi"),("ぐぇ","gwe"),("ぐぉ","gwo"),("ぐゎ","gwa"),("け","ke"),("げ","ge"),("こ","ko"),("ご","go"),("さ","sa"),("ざ","za"),("し","shi"),("しぇ","she"),("しゃ","sha"),("しゅ","shu"),("しょ","sho"),("じ","ji"),("じぇ","je"),("じゃ","ja"),("じゅ","ju"),("じょ","jo"),("す","su"),("すぃ","si"),("ず","zu"),("ずぃ","zi"),("せ","se"),("ぜ","ze"),("そ","so"),("ぞ","zo"),("た","ta"),("だ","da"),("ち","chi"),("ちぇ","che"),("ちゃ","cha"),("ちゅ","chu"),("ちょ","cho"),("ぢぃ","dyi"),("ぢぇ","dye"),("ぢゃ","dya"),("ぢゅ","dyu"),("ぢょ","dyo"),("っや","'ya"),("っゆ","'yu"),("っよ","'yo"),("っわ","'wa"),("っゐ","'wi"),("っゑ","'we"),("っを","'wo"),("っん","'n"),("つ","tsu"),("つぁ","tsa"),("つぃ","tsi"),("つぇ","tse"),("つぉ","tso"),("て","te"),("てぃ","ti"),("で","de"),("でぃ","di"),("と","to"),("とぅ","tu"),("ど","do"),("どぅ","du"),("な","na"),("に","ni"),("にゃ","nya"),("にゅ","nyu"),("にょ","nyo"),("ぬ","nu"),("ね","ne"),("の","no"),("は","ha"),("ば","ba"),("ぱ","pa"),("ひ","hi"),("ひゃ","hya"),("ひゅ","hyu"),("ひょ","hyo"),("び","bi"),("びゃ","bya"),("びゅ","byu"),("びょう","byo"),("ぴ","pi"),("ぴゃ","pya"),("ぴゅ","pyu"),("ぴょお","pyo"),("ふ","fu"),("ふ","fu"),("ふぁ","fa"),("ふぃ","fi"),("ふぇ","fe"),("ふぉ","fo"),("ぶ","bu"),("ぷ","pu"),("へ","he"),("べ","be"),("ぺ","pe"),("ほ","ho"),("ぼ","bo"),("ぽ","po"),("ま","ma"),("み","mi"),("みゃ","mya"),("みゅ","myu"),("みょ","myo"),("む","mu"),("め","me"),("も","mo"),("や","ya"),("ゆ","yu"),("よ","yo"),("ら","ra"),("り","ri"),("りゃ","rya"),("りゅ","ryu"),("りょ","ryo"),("る","ru"),("れ","re"),("ろ","ro"),("わ","wa"),("ゐ","wi"),("ゑ","we"),("を","wo"),("をぉ","wu"),("ん","n") + ("あ","a"),("い","i"),("いぃ","yi"),("う","u"),("え","e"),("えぇ","ye"),("お","o"),("か","ka"),("が","ga"),("き","ki"),("きゃ","kya"),("きゅ","kyu"),("きょ","kyo"),("ぎ","gi"),("ぎゃ","gya"),("ぎゅ","gyu"),("ぎょ","gyo"),("く","ku"),("くぃ","kwi"),("くぇ","kwe"),("くぉ","kwo"),("くゎ","kwa"),("ぐ","gu"),("ぐぃ","gwi"),("ぐぇ","gwe"),("ぐぉ","gwo"),("ぐゎ","gwa"),("け","ke"),("げ","ge"),("こ","ko"),("ご","go"),("さ","sa"),("ざ","za"),("し","shi"),("しぇ","she"),("しゃ","sha"),("しゅ","shu"),("しょ","sho"),("じ","ji"),("じぇ","je"),("じゃ","ja"),("じゅ","ju"),("じょ","jo"),("す","su"),("すぃ","si"),("ず","zu"),("ずぃ","zi"),("せ","se"),("ぜ","ze"),("そ","so"),("ぞ","zo"),("た","ta"),("だ","da"),("ち","chi"),("ちぇ","che"),("ちゃ","cha"),("ちゅ","chu"),("ちょ","cho"),("ぢぃ","dyi"),("ぢぇ","dye"),("ぢゃ","dya"),("ぢゅ","dyu"),("ぢょ","dyo"),("っや","'ya"),("っゆ","'yu"),("っよ","'yo"),("っわ","'wa"),("っゐ","'wi"),("っゑ","'we"),("っを","'wo"),("っん","'n"),("つ","tsu"),("つぁ","tsa"),("つぃ","tsi"),("つぇ","tse"),("つぉ","tso"),("て","te"),("てぃ","ti"),("で","de"),("でぃ","di"),("と","to"),("とぅ","tu"),("ど","do"),("どぅ","du"),("な","na"),("に","ni"),("にゃ","nya"),("にゅ","nyu"),("にょ","nyo"),("ぬ","nu"),("ね","ne"),("の","no"),("は","ha"),("ば","ba"),("ぱ","pa"),("ひ","hi"),("ひゃ","hya"),("ひゅ","hyu"),("ひょ","hyo"),("び","bi"),("びゃ","bya"),("びゅ","byu"),("びょ","byo"),("ぴ","pi"),("ぴゃ","pya"),("ぴゅ","pyu"),("ぴょ","pyo"),("ふ","fu"),("ふ","fu"),("ふぁ","fa"),("ふぃ","fi"),("ふぇ","fe"),("ふぉ","fo"),("ぶ","bu"),("ぷ","pu"),("へ","he"),("べ","be"),("ぺ","pe"),("ほ","ho"),("ぼ","bo"),("ぽ","po"),("ま","ma"),("み","mi"),("みゃ","mya"),("みゅ","myu"),("みょ","myo"),("む","mu"),("め","me"),("も","mo"),("や","ya"),("ゆ","yu"),("よ","yo"),("ら","ra"),("り","ri"),("りゃ","rya"),("りゅ","ryu"),("りょ","ryo"),("る","ru"),("れ","re"),("ろ","ro"),("わ","wa"),("ゐ","wi"),("ゑ","we"),("を","wo"),("をぉ","wu"),("ん","n") ]; // hiragana rows @@ -146,7 +147,7 @@ pub const RYA_ROW: &'static [(&str, &str)] = &[ ]; pub const HIRAGANA_TO_ROW_TABLE: &'static [(&str, &'static [(&str, &str)])] = &[ - ("い",A_ROW),("いぃ",YA_ROW),("う",A_ROW),("え",A_ROW),("えぇ",YA_ROW),("お",A_ROW),("か",KA_ROW),("が",GA_ROW),("き",KA_ROW),("きゃ",KYA_ROW),("きゅ",KYA_ROW),("きょ",KYA_ROW),("ぎ",GA_ROW),("ぎゃ",GYA_ROW),("ぎゅ",GYA_ROW),("ぎょ",GYA_ROW),("く",KA_ROW),("くぁ",KWA_ROW),("くぃ",KWA_ROW),("くぇ",KWA_ROW),("くぉ",KWA_ROW),("ぐ",GA_ROW),("ぐぁ",GWA_ROW),("ぐぃ",GWA_ROW),("ぐぇ",GWA_ROW),("ぐぉ",GWA_ROW),("け",KA_ROW),("げ",GA_ROW),("こ",KA_ROW),("ご",KA_ROW),("さ",SA_ROW),("ざ",ZA_ROW),("し",SHA_ROW),("しぇ",SHA_ROW),("しゃ",SHA_ROW),("しゅ",SHA_ROW),("しょ",SHA_ROW),("じ",JA_ROW),("じぇ",JA_ROW),("じゃ",JA_ROW),("じゅ",JA_ROW),("じょ",JA_ROW),("す",SA_ROW),("すぃ",SA_ROW),("ず",ZA_ROW),("ずぃ",ZA_ROW),("せ",SA_ROW),("ぜ",ZA_ROW),("そ",SA_ROW),("ぞ",ZA_ROW),("た",TA_ROW),("だ",DA_ROW),("ち",CHA_ROW),("ちぇ",CHA_ROW),("ちゃ",CHA_ROW),("ちゅ",CHA_ROW),("ちょ",CHA_ROW),("っあ",QA_ROW),("っい",QA_ROW),("っう",QA_ROW),("っえ",QA_ROW),("っお",QA_ROW),("っや",QYA_ROW),("っゆ",QYA_ROW),("っよ",QYA_ROW),("っわ",QWA_ROW),("っゐ",QWA_ROW),("っゑ",QWA_ROW),("っを",QWA_ROW),("つ",TSA_ROW),("つぁ",TSA_ROW),("つぃ",TSA_ROW),("つぇ",TSA_ROW),("つぉ",TSA_ROW),("て",TA_ROW),("てぃ",TA_ROW),("で",DA_ROW),("でぃ",DA_ROW),("と",TA_ROW),("とぅ",TA_ROW),("ど",DA_ROW),("どぅ",DA_ROW),("な",NA_ROW),("に",NA_ROW),("にゃ",NA_ROW),("にゅ",NA_ROW),("にょ",NA_ROW),("ぬ",NA_ROW),("ね",NA_ROW),("の",NA_ROW),("は",HA_ROW),("ば",BA_ROW),("ぱ",PA_ROW),("ひ",HA_ROW),("ひゃ",HYA_ROW),("ひゅ",HYA_ROW),("ひょ",HYA_ROW),("び",BA_ROW),("ぴ",PA_ROW),("ふ",FA_ROW),("ふぁ",FA_ROW),("ふぃ",FA_ROW),("ふぇ",FA_ROW),("ふぉ",FA_ROW),("ぶ",BA_ROW),("ぷ",PA_ROW),("へ",HA_ROW),("べ",BA_ROW),("ぺ",PA_ROW),("ほ",HA_ROW),("ぼ",BA_ROW),("ぽ",PA_ROW),("ま",MA_ROW),("み",MA_ROW),("みゃ",MYA_ROW),("みゅ",MYA_ROW),("みょ",MYA_ROW),("む",MA_ROW),("め",MA_ROW),("も",MA_ROW),("や",YA_ROW),("ゆ",YA_ROW),("よ",YA_ROW),("ら",RA_ROW),("り",RA_ROW),("りゃ",RYA_ROW),("りゅ",RYA_ROW),("りょ",RYA_ROW),("る",RA_ROW),("れ",RA_ROW),("ろ",RA_ROW),("わ",WA_ROW),("ゐ",WA_ROW),("ゑ",WA_ROW),("を",WA_ROW),("をぅ",WA_ROW) + ("い",A_ROW),("いぃ",YA_ROW),("う",A_ROW),("え",A_ROW),("えぇ",YA_ROW),("お",A_ROW),("か",KA_ROW),("が",GA_ROW),("き",KA_ROW),("きゃ",KYA_ROW),("きゅ",KYA_ROW),("きょ",KYA_ROW),("ぎ",GA_ROW),("ぎゃ",GYA_ROW),("ぎゅ",GYA_ROW),("ぎょ",GYA_ROW),("く",KA_ROW),("くぁ",KWA_ROW),("くぃ",KWA_ROW),("くぇ",KWA_ROW),("くぉ",KWA_ROW),("ぐ",GA_ROW),("ぐぁ",GWA_ROW),("ぐぃ",GWA_ROW),("ぐぇ",GWA_ROW),("ぐぉ",GWA_ROW),("け",KA_ROW),("げ",GA_ROW),("こ",KA_ROW),("ご",KA_ROW),("さ",SA_ROW),("ざ",ZA_ROW),("し",SHA_ROW),("しぇ",SHA_ROW),("しゃ",SHA_ROW),("しゅ",SHA_ROW),("しょ",SHA_ROW),("じ",JA_ROW),("じぇ",JA_ROW),("じゃ",JA_ROW),("じゅ",JA_ROW),("じょ",JA_ROW),("す",SA_ROW),("すぃ",SA_ROW),("ず",ZA_ROW),("ずぃ",ZA_ROW),("せ",SA_ROW),("ぜ",ZA_ROW),("そ",SA_ROW),("ぞ",ZA_ROW),("た",TA_ROW),("だ",DA_ROW),("ち",CHA_ROW),("ちぇ",CHA_ROW),("ちゃ",CHA_ROW),("ちゅ",CHA_ROW),("ちょ",CHA_ROW),("っあ",QA_ROW),("っい",QA_ROW),("っう",QA_ROW),("っえ",QA_ROW),("っお",QA_ROW),("っや",QYA_ROW),("っゆ",QYA_ROW),("っよ",QYA_ROW),("っわ",QWA_ROW),("っゐ",QWA_ROW),("っゑ",QWA_ROW),("っを",QWA_ROW),("つ",TSA_ROW),("つぁ",TSA_ROW),("つぃ",TSA_ROW),("つぇ",TSA_ROW),("つぉ",TSA_ROW),("て",TA_ROW),("てぃ",TA_ROW),("で",DA_ROW),("でぃ",DA_ROW),("と",TA_ROW),("とぅ",TA_ROW),("ど",DA_ROW),("どぅ",DA_ROW),("な",NA_ROW),("に",NA_ROW),("にゃ",NYA_ROW),("にゅ",NYA_ROW),("にょ",NYA_ROW),("ぬ",NA_ROW),("ね",NA_ROW),("の",NA_ROW),("は",HA_ROW),("ば",BA_ROW),("ぱ",PA_ROW),("ひ",HA_ROW),("ひゃ",HYA_ROW),("ひゅ",HYA_ROW),("ひょ",HYA_ROW),("び",BA_ROW),("ぴ",PA_ROW),("ふ",FA_ROW),("ふぁ",FA_ROW),("ふぃ",FA_ROW),("ふぇ",FA_ROW),("ふぉ",FA_ROW),("ぶ",BA_ROW),("ぷ",PA_ROW),("へ",HA_ROW),("べ",BA_ROW),("ぺ",PA_ROW),("ほ",HA_ROW),("ぼ",BA_ROW),("ぽ",PA_ROW),("ま",MA_ROW),("み",MA_ROW),("みゃ",MYA_ROW),("みゅ",MYA_ROW),("みょ",MYA_ROW),("む",MA_ROW),("め",MA_ROW),("も",MA_ROW),("や",YA_ROW),("ゆ",YA_ROW),("よ",YA_ROW),("ら",RA_ROW),("り",RA_ROW),("りゃ",RYA_ROW),("りゅ",RYA_ROW),("りょ",RYA_ROW),("る",RA_ROW),("れ",RA_ROW),("ろ",RA_ROW),("わ",WA_ROW),("ゐ",WA_ROW),("ゑ",WA_ROW),("を",WA_ROW),("をぅ",WA_ROW) ]; // 長音符 ー ちょうおんぷ @@ -191,4 +192,6 @@ pub const HIRAGANA_TO_ROW_TABLE: &'static [(&str, &'static [(&str, &str)])] = &[ "あ","い","いぃ","う","え","えぇ","お","か","が","き","きゃ","きゅ","きょ","ぎ","ぎゃ","ぎゅ","ぎょ","く","くぁ","くぃ","くぇ","くぉ","ぐ","ぐぁ","ぐぃ","ぐぇ","ぐぉ","け","げ","こ","ご","さ","ざ","し","しぇ","しゃ","しゅ","しょ","じ","じぇ","じゃ","じゅ","じょ","す","すぃ","ず","ずぃ","せ","ぜ","そ","ぞ","た","だ","ち","ちぇ","ちゃ","ちゅ","ちょ","っあ","っい","っう","っえ","っお","っや","っゆ","っよ","っわ","っゐ","っゑ","っを","っん","つ","つぁ","つぃ","つぇ","つぉ","て","てぃ","で","でぃ","と","とぅ","ど","どぅ","な","に","にゃ","にゅ","にょ","ぬ","ね","の","は","ば","ぱ","ひ","ひゃ","ひゅ","ひょ","び","ぴ","ふ","ふぁ","ふぃ","ふぇ","ふぉ","ぶ","ぷ","へ","べ","ぺ","ほ","ぼ","ぽ","ま","み","みゃ","みゅ","みょ","む","め","も","や","ゆ","よ","ら","り","りゃ","りゅ","りょ","る","れ","ろ","わ","ゐ","ゑ","を","を","ん" + +"にゃ" "にゅ" "にょ" */ diff --git a/src/okinawan/tests.rs b/src/okinawan/tests.rs index 787c985..9b530d2 100644 --- a/src/okinawan/tests.rs +++ b/src/okinawan/tests.rs @@ -1,4 +1,5 @@ -use okinawan::util::*; +use okinawan::util::{two_char_is_single_mora, remove_last_mora}; +use okinawan::lib::*; #[cfg(test)] mod tests { @@ -17,4 +18,61 @@ mod tests { assert_eq!(replace_last_with_vowel("かちゃ","え"), "かちぇ".to_string()); assert_eq!(replace_last_with_vowel("かちゃ","お"), "かちょ".to_string()); } + + #[test] + fn test_two_char_is_single_mora() { + assert!(two_char_is_single_mora("っん")); + assert!(two_char_is_single_mora("っゑ")); + assert!(two_char_is_single_mora("っわ")); + assert!(two_char_is_single_mora("びょ")); + assert!(two_char_is_single_mora("ちゅ")); + + assert!(!two_char_is_single_mora("ん")); + assert!(!two_char_is_single_mora("わ")); + assert!(!two_char_is_single_mora("び")); + } + + #[test] + fn test_remove_last_mora() { + assert_eq!(remove_last_mora("かちゃ"), "か"); + assert_eq!(remove_last_mora("かち"), "か"); + assert_eq!(remove_last_mora("かちゅ"), "か"); + assert_eq!(remove_last_mora("かちぇ"), "か"); + assert_eq!(remove_last_mora("かちょ"), "か"); + + assert_eq!(remove_last_mora("まま"), "ま"); + assert_eq!(remove_last_mora("まみ"), "ま"); + assert_eq!(remove_last_mora("まむ"), "ま"); + assert_eq!(remove_last_mora("まめ"), "ま"); + assert_eq!(remove_last_mora("まも"), "ま"); + } + + #[test] + fn test__stems() { + assert_eq!(base_stem("かちゅん",VerbType::I1), "かか".to_string()); + assert_eq!(connective_stem("かちゅん",VerbType::I1), "かちゃ".to_string()); + assert_eq!(derivative_stem("かちゅん",VerbType::I1), "かちゃ".to_string()); + assert_eq!(euphonic_stem("かちゅん",VerbType::I1), "かちゃ".to_string()); + } + + #[test] + fn test_conjugate_verb() { + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::NonPast ), "かちゅん".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::NonPastNegative ), "かかん".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::PastNegative ), "かかんたん".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Past ), "かちゃん".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::NonPastPolite ), "かちゃびーん".to_string()); + + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::NonPast ), "むちゅん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::NonPastNegative ), "むたん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::PastNegative ), "むたんたん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Past ), "むっちゃん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::NonPastPolite ), "むちゃびーん".to_string()); + + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPast ), "ゆむん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPastNegative ), "ゆまん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::PastNegative ), "ゆまんたん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Past ), "ゆらん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPastPolite ), "ゆまびーん".to_string()); + } } diff --git a/src/okinawan/util.rs b/src/okinawan/util.rs index 42c0927..b0af15c 100644 --- a/src/okinawan/util.rs +++ b/src/okinawan/util.rs @@ -217,3 +217,31 @@ pub fn replace_last_with_vowel(word: &str, vowel: &str) -> String { word.to_string() } } + + +pub fn remove_last_mora(word: &str) -> String { + let word_len = word.chars().count(); + if word_len > 1 { + let (left, right) = util::chars_split(word, word_len-2); + if two_char_is_single_mora(&right) { + format!("{}", left) + } else { + let (left, _) = util::chars_split(word, word_len-1); + format!("{}", left) + } + } else if word_len == 1 { + let (left, _) = util::chars_split(word, word_len-1); + format!("{}", left); + word.to_string() + } else { + word.to_string() + } +} + +pub fn two_char_is_single_mora(s: &str) -> bool { + s.chars().count() == 2 && ["いぃ","えぇ","きゃ","きゅ","きょ","ぎゃ","ぎゅ","ぎょ","くぃ","くぇ","くぉ","くゎ","ぐぃ","ぐぇ","ぐぉ","ぐゎ","しぇ","しゃ","しゅ","しょ","じぇ","じゃ","じゅ","じょ","すぃ","ずぃ","ちぇ","ちゃ","ちゅ","ちょ","ぢぃ","ぢぇ","ぢゃ","ぢゅ","ぢょ","っ","っや","っゆ","っよ","っわ","っゐ","っゑ","っを","っん","つ","つぁ","つぃ","つぇ","つぉ","てぃ","でぃ","とぅ","どぅ","にゃ","にゅ","にょ","ひゃ","ひゅ","ひょ","びゃ","びゅ","びょ","ぴゃ","ぴゅ","ぴょ","ふぁ","ふぃ","ふぇ","ふぉ","みゃ","みゅ","みょ","りゃ","りゅ","りょ","をぉ"].contains(&s) +} + +/* + +*/ diff --git a/src/util.rs b/src/util.rs index 6b22b0e..b0e1115 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,5 +1,24 @@ use std::cmp::Ordering::{Equal,Greater,Less}; +pub fn truncate_chars(s: &str, max_chars: usize) -> &str { + match s.char_indices().nth(max_chars) { + None => s, + Some((idx, _)) => &s[..idx], + } +} + +pub fn split_chars_at(s: &str, split: usize) -> (String,String) { + let length = s.chars().count(); + + let l_sub_vec = &s.chars().collect::>()[0 .. split]; + let l: String = l_sub_vec.into_iter().collect(); + + let r_sub_vec = &s.chars().collect::>()[split .. s.chars().count()]; + let r: String = r_sub_vec.into_iter().collect(); + (l,r) +} + + // the table provided must be sorted or it may not return the correct value pub fn lookup(s: &str, table: &'static [(&str,&str)]) -> Option { match table.binary_search_by(|&(key, _)| { From 1b92ba93bdeb61143d0d278681e958f7daf3fde2 Mon Sep 17 00:00:00 2001 From: James Haver Date: Tue, 27 Mar 2018 05:51:35 +0800 Subject: [PATCH 04/16] Add more conjugations --- src/okinawan/lib.rs | 37 +++++++++++++++++++++++++++++++++++-- src/okinawan/tables.rs | 2 +- src/okinawan/tests.rs | 37 ++++++++++++++++++++++++++++++++++++- src/util.rs | 11 +++++++++++ 4 files changed, 83 insertions(+), 4 deletions(-) diff --git a/src/okinawan/lib.rs b/src/okinawan/lib.rs index 85c3fa4..d79f753 100644 --- a/src/okinawan/lib.rs +++ b/src/okinawan/lib.rs @@ -208,14 +208,47 @@ pub fn replace_last_with_vowel(word: &str, vowel: &str) -> String { pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> String { use self::VerbConjugation::*; + use self::VerbType::*; match conjugation { // NonPast => verb.to_string(), - NonPast => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "ん"), + NonPast => + match vt { + I1 | I2 | I3 | I4 | I5 | I6 | I7 | I8 | I9 | I10 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "ん"), + II1 | II2 | II3 | II4 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "い"), "ん"), + _ => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "ん"), + }, NonPastNegative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "あ"), "ん"), PastNegative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "あ"), "んたん"), Past => format!("{}{}", replace_last_with_vowel(&euphonic_stem(verb, vt), "あ"), "ん"), NonPastPolite => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "あ"), "びーん"), - + + YesNoInterrogative => + match vt { + II1 | II2 | II3 | II4 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "い"), "み"), + _ => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "み"), + }, + WhInterrogative => + match vt { + II1 | II2 | II3 | II4 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "い"), "が"), + _ => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "が"), + }, + Honorific => + match vt { + II1 | II2 | II3 | II4 => format!("{}{}", connective_stem(verb, vt), "みせーん"), + _ => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "い"), "みせーん"), + }, + Potential => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "い"), "ゆーすん"), + Desiderative => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "い"), "ぶさん"), + Imperative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "え"), "ー"), + + Prohibitive => + match vt { + II1 | II2 | II3 | II4 => format!("{}{}", base_stem(verb, vt), "んな"), + _ => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "う"), "な"), + }, + + Gerund => replace_last_with_vowel(&euphonic_stem(verb, vt), "い"), + _ => verb.to_string() } } diff --git a/src/okinawan/tables.rs b/src/okinawan/tables.rs index 6290033..cc21f47 100644 --- a/src/okinawan/tables.rs +++ b/src/okinawan/tables.rs @@ -147,7 +147,7 @@ pub const RYA_ROW: &'static [(&str, &str)] = &[ ]; pub const HIRAGANA_TO_ROW_TABLE: &'static [(&str, &'static [(&str, &str)])] = &[ - ("い",A_ROW),("いぃ",YA_ROW),("う",A_ROW),("え",A_ROW),("えぇ",YA_ROW),("お",A_ROW),("か",KA_ROW),("が",GA_ROW),("き",KA_ROW),("きゃ",KYA_ROW),("きゅ",KYA_ROW),("きょ",KYA_ROW),("ぎ",GA_ROW),("ぎゃ",GYA_ROW),("ぎゅ",GYA_ROW),("ぎょ",GYA_ROW),("く",KA_ROW),("くぁ",KWA_ROW),("くぃ",KWA_ROW),("くぇ",KWA_ROW),("くぉ",KWA_ROW),("ぐ",GA_ROW),("ぐぁ",GWA_ROW),("ぐぃ",GWA_ROW),("ぐぇ",GWA_ROW),("ぐぉ",GWA_ROW),("け",KA_ROW),("げ",GA_ROW),("こ",KA_ROW),("ご",KA_ROW),("さ",SA_ROW),("ざ",ZA_ROW),("し",SHA_ROW),("しぇ",SHA_ROW),("しゃ",SHA_ROW),("しゅ",SHA_ROW),("しょ",SHA_ROW),("じ",JA_ROW),("じぇ",JA_ROW),("じゃ",JA_ROW),("じゅ",JA_ROW),("じょ",JA_ROW),("す",SA_ROW),("すぃ",SA_ROW),("ず",ZA_ROW),("ずぃ",ZA_ROW),("せ",SA_ROW),("ぜ",ZA_ROW),("そ",SA_ROW),("ぞ",ZA_ROW),("た",TA_ROW),("だ",DA_ROW),("ち",CHA_ROW),("ちぇ",CHA_ROW),("ちゃ",CHA_ROW),("ちゅ",CHA_ROW),("ちょ",CHA_ROW),("っあ",QA_ROW),("っい",QA_ROW),("っう",QA_ROW),("っえ",QA_ROW),("っお",QA_ROW),("っや",QYA_ROW),("っゆ",QYA_ROW),("っよ",QYA_ROW),("っわ",QWA_ROW),("っゐ",QWA_ROW),("っゑ",QWA_ROW),("っを",QWA_ROW),("つ",TSA_ROW),("つぁ",TSA_ROW),("つぃ",TSA_ROW),("つぇ",TSA_ROW),("つぉ",TSA_ROW),("て",TA_ROW),("てぃ",TA_ROW),("で",DA_ROW),("でぃ",DA_ROW),("と",TA_ROW),("とぅ",TA_ROW),("ど",DA_ROW),("どぅ",DA_ROW),("な",NA_ROW),("に",NA_ROW),("にゃ",NYA_ROW),("にゅ",NYA_ROW),("にょ",NYA_ROW),("ぬ",NA_ROW),("ね",NA_ROW),("の",NA_ROW),("は",HA_ROW),("ば",BA_ROW),("ぱ",PA_ROW),("ひ",HA_ROW),("ひゃ",HYA_ROW),("ひゅ",HYA_ROW),("ひょ",HYA_ROW),("び",BA_ROW),("ぴ",PA_ROW),("ふ",FA_ROW),("ふぁ",FA_ROW),("ふぃ",FA_ROW),("ふぇ",FA_ROW),("ふぉ",FA_ROW),("ぶ",BA_ROW),("ぷ",PA_ROW),("へ",HA_ROW),("べ",BA_ROW),("ぺ",PA_ROW),("ほ",HA_ROW),("ぼ",BA_ROW),("ぽ",PA_ROW),("ま",MA_ROW),("み",MA_ROW),("みゃ",MYA_ROW),("みゅ",MYA_ROW),("みょ",MYA_ROW),("む",MA_ROW),("め",MA_ROW),("も",MA_ROW),("や",YA_ROW),("ゆ",YA_ROW),("よ",YA_ROW),("ら",RA_ROW),("り",RA_ROW),("りゃ",RYA_ROW),("りゅ",RYA_ROW),("りょ",RYA_ROW),("る",RA_ROW),("れ",RA_ROW),("ろ",RA_ROW),("わ",WA_ROW),("ゐ",WA_ROW),("ゑ",WA_ROW),("を",WA_ROW),("をぅ",WA_ROW) + ("あ",A_ROW),("い",A_ROW),("いぃ",YA_ROW),("う",A_ROW),("え",A_ROW),("えぇ",YA_ROW),("お",A_ROW),("か",KA_ROW),("が",GA_ROW),("き",KA_ROW),("きゃ",KYA_ROW),("きゅ",KYA_ROW),("きょ",KYA_ROW),("ぎ",GA_ROW),("ぎゃ",GYA_ROW),("ぎゅ",GYA_ROW),("ぎょ",GYA_ROW),("く",KA_ROW),("くぁ",KWA_ROW),("くぃ",KWA_ROW),("くぇ",KWA_ROW),("くぉ",KWA_ROW),("ぐ",GA_ROW),("ぐぁ",GWA_ROW),("ぐぃ",GWA_ROW),("ぐぇ",GWA_ROW),("ぐぉ",GWA_ROW),("け",KA_ROW),("げ",GA_ROW),("こ",KA_ROW),("ご",KA_ROW),("さ",SA_ROW),("ざ",ZA_ROW),("し",SHA_ROW),("しぇ",SHA_ROW),("しゃ",SHA_ROW),("しゅ",SHA_ROW),("しょ",SHA_ROW),("じ",JA_ROW),("じぇ",JA_ROW),("じゃ",JA_ROW),("じゅ",JA_ROW),("じょ",JA_ROW),("す",SA_ROW),("すぃ",SA_ROW),("ず",ZA_ROW),("ずぃ",ZA_ROW),("せ",SA_ROW),("ぜ",ZA_ROW),("そ",SA_ROW),("ぞ",ZA_ROW),("た",TA_ROW),("だ",DA_ROW),("ち",CHA_ROW),("ちぇ",CHA_ROW),("ちゃ",CHA_ROW),("ちゅ",CHA_ROW),("ちょ",CHA_ROW),("っあ",QA_ROW),("っい",QA_ROW),("っう",QA_ROW),("っえ",QA_ROW),("っお",QA_ROW),("っや",QYA_ROW),("っゆ",QYA_ROW),("っよ",QYA_ROW),("っわ",QWA_ROW),("っゐ",QWA_ROW),("っゑ",QWA_ROW),("っを",QWA_ROW),("つ",TSA_ROW),("つぁ",TSA_ROW),("つぃ",TSA_ROW),("つぇ",TSA_ROW),("つぉ",TSA_ROW),("て",TA_ROW),("てぃ",TA_ROW),("で",DA_ROW),("でぃ",DA_ROW),("と",TA_ROW),("とぅ",TA_ROW),("ど",DA_ROW),("どぅ",DA_ROW),("な",NA_ROW),("に",NA_ROW),("にゃ",NYA_ROW),("にゅ",NYA_ROW),("にょ",NYA_ROW),("ぬ",NA_ROW),("ね",NA_ROW),("の",NA_ROW),("は",HA_ROW),("ば",BA_ROW),("ぱ",PA_ROW),("ひ",HA_ROW),("ひゃ",HYA_ROW),("ひゅ",HYA_ROW),("ひょ",HYA_ROW),("び",BA_ROW),("ぴ",PA_ROW),("ふ",FA_ROW),("ふぁ",FA_ROW),("ふぃ",FA_ROW),("ふぇ",FA_ROW),("ふぉ",FA_ROW),("ぶ",BA_ROW),("ぷ",PA_ROW),("へ",HA_ROW),("べ",BA_ROW),("ぺ",PA_ROW),("ほ",HA_ROW),("ぼ",BA_ROW),("ぽ",PA_ROW),("ま",MA_ROW),("み",MA_ROW),("みゃ",MYA_ROW),("みゅ",MYA_ROW),("みょ",MYA_ROW),("む",MA_ROW),("め",MA_ROW),("も",MA_ROW),("や",YA_ROW),("ゆ",YA_ROW),("よ",YA_ROW),("ら",RA_ROW),("り",RA_ROW),("りゃ",RYA_ROW),("りゅ",RYA_ROW),("りょ",RYA_ROW),("る",RA_ROW),("れ",RA_ROW),("ろ",RA_ROW),("わ",WA_ROW),("ゐ",WA_ROW),("ゑ",WA_ROW),("を",WA_ROW),("をぅ",WA_ROW) ]; // 長音符 ー ちょうおんぷ diff --git a/src/okinawan/tests.rs b/src/okinawan/tests.rs index 9b530d2..fa8d422 100644 --- a/src/okinawan/tests.rs +++ b/src/okinawan/tests.rs @@ -17,6 +17,18 @@ mod tests { assert_eq!(replace_last_with_vowel("かちゃ","う"), "かちゅ".to_string()); assert_eq!(replace_last_with_vowel("かちゃ","え"), "かちぇ".to_string()); assert_eq!(replace_last_with_vowel("かちゃ","お"), "かちょ".to_string()); + + assert_eq!(replace_last_with_vowel("ああ","あ"), "ああ".to_string()); + assert_eq!(replace_last_with_vowel("ああ","い"), "あい".to_string()); + assert_eq!(replace_last_with_vowel("ああ","う"), "あう".to_string()); + assert_eq!(replace_last_with_vowel("ああ","え"), "あえ".to_string()); + assert_eq!(replace_last_with_vowel("ああ","お"), "あお".to_string()); + + assert_eq!(replace_last_with_vowel("あ","あ"), "あ".to_string()); + assert_eq!(replace_last_with_vowel("あ","い"), "い".to_string()); + assert_eq!(replace_last_with_vowel("あ","う"), "う".to_string()); + assert_eq!(replace_last_with_vowel("あ","え"), "え".to_string()); + assert_eq!(replace_last_with_vowel("あ","お"), "お".to_string()); } #[test] @@ -62,17 +74,40 @@ mod tests { assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::PastNegative ), "かかんたん".to_string()); assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Past ), "かちゃん".to_string()); assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::NonPastPolite ), "かちゃびーん".to_string()); - + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::YesNoInterrogative ), "かちゅみ".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::WhInterrogative ), "かちゅが".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Honorific ), "かちみせーん".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Imperative ), "かけー".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Prohibitive ), "かくな".to_string()); + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Desiderative ), "かちぶさん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::NonPast ), "むちゅん".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::NonPastNegative ), "むたん".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::PastNegative ), "むたんたん".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Past ), "むっちゃん".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::NonPastPolite ), "むちゃびーん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::YesNoInterrogative ), "むちゅみ".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::WhInterrogative ), "むちゅが".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Honorific ), "むちみせーん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Imperative ), "むてー".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Prohibitive ), "むとぅな".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Desiderative ), "むちぶさん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPast ), "ゆむん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPastNegative ), "ゆまん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::PastNegative ), "ゆまんたん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Past ), "ゆらん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPastPolite ), "ゆまびーん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::YesNoInterrogative ), "ゆむみ".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::WhInterrogative ), "ゆむが".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Honorific ), "ゆみみせーん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Imperative ), "ゆめー".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Prohibitive ), "ゆむな".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Desiderative ), "ゆみぶさん".to_string()); + + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPast ), "とぅいん".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPastNegative ), "とぅらん".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::PastNegative ), "とぅらんたん".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Past ), "とぅらん".to_string()); // た } } diff --git a/src/util.rs b/src/util.rs index b0e1115..1615715 100644 --- a/src/util.rs +++ b/src/util.rs @@ -97,3 +97,14 @@ pub fn chars_split(s: &str, split: usize) -> (String,String) { (l,r) } + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_chars_split() { + assert_eq!(chars_split("かた",1), ("か".to_string(), "た".to_string())); + assert_eq!(chars_split("あい",1), ("あ".to_string(), "い".to_string())); + assert_eq!(chars_split("あ",1), ("あ".to_string(), "".to_string())); + } +} From 24d9fa687a6d8a35fd0884660fefeab473f38a05 Mon Sep 17 00:00:00 2001 From: James Haver Date: Tue, 27 Mar 2018 14:27:42 +0800 Subject: [PATCH 05/16] Small fixes for type II verbs --- src/okinawan/lib.rs | 10 +++++++--- src/okinawan/tests.rs | 9 +++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/okinawan/lib.rs b/src/okinawan/lib.rs index d79f753..bd8ff0e 100644 --- a/src/okinawan/lib.rs +++ b/src/okinawan/lib.rs @@ -220,7 +220,11 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> NonPastNegative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "あ"), "ん"), PastNegative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "あ"), "んたん"), Past => format!("{}{}", replace_last_with_vowel(&euphonic_stem(verb, vt), "あ"), "ん"), - NonPastPolite => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "あ"), "びーん"), + NonPastPolite => match vt { + II1 | II2 | II3 | II4 => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "い"), "びーん"), // やびーん + _ => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "あ"), "びーん"), + + } YesNoInterrogative => match vt { @@ -234,7 +238,7 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> }, Honorific => match vt { - II1 | II2 | II3 | II4 => format!("{}{}", connective_stem(verb, vt), "みせーん"), + // II1 | II2 | II3 | II4 => format!("{}{}", connective_stem(verb, vt), "みせーん"), _ => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "い"), "みせーん"), }, Potential => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "い"), "ゆーすん"), @@ -243,7 +247,7 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> Prohibitive => match vt { - II1 | II2 | II3 | II4 => format!("{}{}", base_stem(verb, vt), "んな"), + // II1 | II2 | II3 | II4 => format!("{}{}", base_stem(verb, vt), "んな"), // んな _ => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "う"), "な"), }, diff --git a/src/okinawan/tests.rs b/src/okinawan/tests.rs index fa8d422..0188c13 100644 --- a/src/okinawan/tests.rs +++ b/src/okinawan/tests.rs @@ -109,5 +109,14 @@ mod tests { assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPastNegative ), "とぅらん".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::PastNegative ), "とぅらんたん".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Past ), "とぅらん".to_string()); // た + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPastPolite ), "とぅいびーん".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::YesNoInterrogative ), "とぅいみ".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::WhInterrogative ), "とぅいが".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Honorific ), "とぅいみせーん".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Imperative ), "とぅれー".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Prohibitive ), "とぅるな".to_string()); // とぅんな + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Desiderative ), "とぅいぶさん".to_string()); + + } } From fe410d5ccd22fdef92c3d3df39c5d9f344cd40f5 Mon Sep 17 00:00:00 2001 From: James Haver Date: Tue, 27 Mar 2018 17:57:54 +0800 Subject: [PATCH 06/16] More conjugations --- src/okinawan/lib.rs | 18 ++++++++- src/okinawan/tests.rs | 93 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 5 deletions(-) diff --git a/src/okinawan/lib.rs b/src/okinawan/lib.rs index bd8ff0e..6c24e31 100644 --- a/src/okinawan/lib.rs +++ b/src/okinawan/lib.rs @@ -162,7 +162,11 @@ pub fn euphonic_stem(verb: &str, vt: VerbType) -> String { match vt { IV => "".to_string(), III => format!("{}た", bare_stem), - II1 | II2 | II3 | II4 => format!("{}ら", bare_stem), + // II1 | II2 | II3 | II4 => format!("{}ら", bare_stem), + II1 => format!("{}った", bare_stem), + II2 => format!("{}た", bare_stem), + II3 => format!("{}っちゃ", bare_stem), + II4 => format!("{}ちゃ", bare_stem), // った I1 => format!("{}ちゃ", bare_stem), I2 => format!("{}じゃ", bare_stem), I3 => format!("{}っちゃ", bare_stem), @@ -211,6 +215,12 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> use self::VerbType::*; match conjugation { // NonPast => verb.to_string(), + AttributiveNonPast => + match vt { + I1 | I2 | I3 | I4 | I5 | I6 | I7 | I8 | I9 | I10 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "る"), + II1 | II2 | II3 | II4 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "い"), "る"), + _ => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "る"), + }, NonPast => match vt { I1 | I2 | I3 | I4 | I5 | I6 | I7 | I8 | I9 | I10 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "ん"), @@ -251,8 +261,12 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> _ => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "う"), "な"), }, - Gerund => replace_last_with_vowel(&euphonic_stem(verb, vt), "い"), + Gerund | Continuative => replace_last_with_vowel(&euphonic_stem(verb, vt), "い"), + Progressive => format!("{}{}", replace_last_with_vowel(&euphonic_stem(verb, vt), "お"), "ーん"), // えーん + Causative => format!("{}{}", &base_stem(verb, vt), "すん"), + Passive => format!("{}{}", &base_stem(verb, vt), "りゆん"), // りーん + _ => verb.to_string() } } diff --git a/src/okinawan/tests.rs b/src/okinawan/tests.rs index 0188c13..270a799 100644 --- a/src/okinawan/tests.rs +++ b/src/okinawan/tests.rs @@ -92,7 +92,12 @@ mod tests { assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Imperative ), "むてー".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Prohibitive ), "むとぅな".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Desiderative ), "むちぶさん".to_string()); - + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::AttributiveNonPast ), "むちゅる".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Progressive ), "むっちょーん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Continuative ), "むっち".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Causative ), "むたすん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Passive ), "むたりゆん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPast ), "ゆむん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPastNegative ), "ゆまん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::PastNegative ), "ゆまんたん".to_string()); @@ -108,7 +113,7 @@ mod tests { assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPast ), "とぅいん".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPastNegative ), "とぅらん".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::PastNegative ), "とぅらんたん".to_string()); - assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Past ), "とぅらん".to_string()); // た + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Past ), "とぅたん".to_string()); // ら assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPastPolite ), "とぅいびーん".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::YesNoInterrogative ), "とぅいみ".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::WhInterrogative ), "とぅいが".to_string()); @@ -117,6 +122,88 @@ mod tests { assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Prohibitive ), "とぅるな".to_string()); // とぅんな assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Desiderative ), "とぅいぶさん".to_string()); - + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::AttributiveNonPast ), "とぅいる".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Progressive ), "とぅとーん".to_string()); // とぅろーん + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Continuative ), "とぅてぃ".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Causative ), "とぅらすん".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Passive ), "とぅらりゆん".to_string()); + } } + +/* +I1 +かちゅん 書ちゅん +ちちゅん 聞ちゅん +さちゅん 咲ちゅん +っあっちゅん 歩ちゅん + +I2 +くうじゅん 漕じゅん +っゐーじゅん 泳じゅん +っおーじゅん 扇じゅん + +I3 +たちゅん 立ちゅん +っうちゅん 打ちゅん +かちゅん 勝ちゅん + +I4 +すらちゅん 育ちゅん +たむちゅん 保ちゅん +くちゅん 朽ちゅん + +I5 +くるすん 殺すん +めーすん 燃すん +はんすん 外すん + +I6 +すん 為ん +しっくぁすん 敷くぁすん +ひっこーすん 比較すん + +I7 +ゆぶん 呼ぶん +とぅぶん 飛ぶん +むすぶん 結ぶん + +I8 +ゆむん 読むん +ぬむん 飲むん +っあむん 編むん + +I9 +にんじゅん 眠じゅん +かんじゅん 被じゅん +っあんじゅん + +I10 +んんじゅん 見じゅん +くんじゅん 括じゅん + +II1 +とぅいん 取いん + +II2 +かいん 刈いん +ぬぶいん 登いん +っあらいん 洗いん + +II3 +きーん 蹴ーん +っいーん 入ーん +ひーん 放ーん +ちーん 切ーん + +II4 +にーん 煮ーん +ちーん 着ーん +っいーん 言ーん +いーん 座ーん + +III +あん 有ん +うん 居ん +やん  +*/ From c1d7381f99e18fdcff3314a26768d2b9cb765712 Mon Sep 17 00:00:00 2001 From: James Haver Date: Thu, 29 Mar 2018 09:10:12 +0800 Subject: [PATCH 07/16] Tests for all I Verbs --- src/okinawan/tests.rs | 123 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 2 deletions(-) diff --git a/src/okinawan/tests.rs b/src/okinawan/tests.rs index 270a799..b201859 100644 --- a/src/okinawan/tests.rs +++ b/src/okinawan/tests.rs @@ -80,7 +80,19 @@ mod tests { assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Imperative ), "かけー".to_string()); assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Prohibitive ), "かくな".to_string()); assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Desiderative ), "かちぶさん".to_string()); - + + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::NonPast ), "くーじゅん".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::NonPastNegative ), "くーがん".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::PastNegative ), "くーがんたん".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::Past ), "くーじゃん".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::NonPastPolite ), "くーじゃびーん".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::YesNoInterrogative ), "くーじゅみ".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::WhInterrogative ), "くーじゅが".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::Honorific ), "くーじみせーん".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::Imperative ), "くーげー".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::Prohibitive ), "くーぐな".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::Desiderative ), "くーじぶさん".to_string()); + assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::NonPast ), "むちゅん".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::NonPastNegative ), "むたん".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::PastNegative ), "むたんたん".to_string()); @@ -97,6 +109,74 @@ mod tests { assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Continuative ), "むっち".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Causative ), "むたすん".to_string()); assert_eq!(conjugate_verb("むちゅん", VerbType::I3, VerbConjugation::Passive ), "むたりゆん".to_string()); + + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::NonPast ), "すらちゅん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::NonPastNegative ), "すらたん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::PastNegative ), "すらたんたん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Past ), "すらちゃん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::NonPastPolite ), "すらちゃびーん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::YesNoInterrogative ), "すらちゅみ".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::WhInterrogative ), "すらちゅが".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Honorific ), "すらちみせーん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Imperative ), "すらてー".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Prohibitive ), "すらとぅな".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Desiderative ), "すらちぶさん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::AttributiveNonPast ), "すらちゅる".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Progressive ), "すらちょーん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Continuative ), "すらち".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Causative ), "すらたすん".to_string()); + assert_eq!(conjugate_verb("すらちゅん", VerbType::I4, VerbConjugation::Passive ), "すらたりゆん".to_string()); + + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::NonPast ), "くるすん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::NonPastNegative ), "くるさん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::PastNegative ), "くるさんたん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Past ), "くるちゃん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::NonPastPolite ), "くるさびーん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::YesNoInterrogative ), "くるすみ".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::WhInterrogative ), "くるすが".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Honorific ), "くるすぃみせーん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Imperative ), "くるせー".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Prohibitive ), "くるすな".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Desiderative ), "くるすぃぶさん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::AttributiveNonPast ), "くるする".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Progressive ), "くるちょーん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Continuative ), "くるち".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Causative ), "くるさすん".to_string()); + assert_eq!(conjugate_verb("くるすん", VerbType::I5, VerbConjugation::Passive ), "くるさりゆん".to_string()); + + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::NonPast ), "ひっこーすん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::NonPastNegative ), "ひっこーさん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::PastNegative ), "ひっこーさんたん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Past ), "ひっこーさん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::NonPastPolite ), "ひっこーさびーん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::YesNoInterrogative ), "ひっこーすみ".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::WhInterrogative ), "ひっこーすが".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Honorific ), "ひっこーすぃみせーん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Imperative ), "ひっこーせー".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Prohibitive ), "ひっこーすな".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Desiderative ), "ひっこーすぃぶさん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::AttributiveNonPast ), "ひっこーする".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Progressive ), "ひっこーそーん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Continuative ), "ひっこーすぃ".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Causative ), "ひっこーさすん".to_string()); + assert_eq!(conjugate_verb("ひっこーすん", VerbType::I6, VerbConjugation::Passive ), "ひっこーさりゆん".to_string()); + + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::NonPast ), "ゆぶん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::NonPastNegative ), "ゆばん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::PastNegative ), "ゆばんたん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Past ), "ゆらん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::NonPastPolite ), "ゆばびーん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::YesNoInterrogative ), "ゆぶみ".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::WhInterrogative ), "ゆぶが".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Honorific ), "ゆびみせーん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Imperative ), "ゆべー".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Prohibitive ), "ゆぶな".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Desiderative ), "ゆびぶさん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::AttributiveNonPast ), "ゆぶる".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Progressive ), "ゆろーん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Continuative ), "ゆり".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Causative ), "ゆばすん".to_string()); + assert_eq!(conjugate_verb("ゆぶん", VerbType::I7, VerbConjugation::Passive ), "ゆばりゆん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPast ), "ゆむん".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::NonPastNegative ), "ゆまん".to_string()); @@ -109,7 +189,46 @@ mod tests { assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Imperative ), "ゆめー".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Prohibitive ), "ゆむな".to_string()); assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Desiderative ), "ゆみぶさん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::AttributiveNonPast ), "ゆむる".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Progressive ), "ゆろーん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Continuative ), "ゆり".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Causative ), "ゆますん".to_string()); + assert_eq!(conjugate_verb("ゆむん", VerbType::I8, VerbConjugation::Passive ), "ゆまりゆん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::NonPast ), "にんじゅん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::NonPastNegative ), "にんらん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::PastNegative ), "にんらんたん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Past ), "にんたん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::NonPastPolite ), "にんじゃびーん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::YesNoInterrogative ), "にんじゅみ".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::WhInterrogative ), "にんじゅが".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Honorific ), "にんじみせーん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Imperative ), "にんれー".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Prohibitive ), "にんるな".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Desiderative ), "にんじぶさん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::AttributiveNonPast ), "にんじゅる".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Progressive ), "にんとーん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Continuative ), "にんてぃ".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Causative ), "にんらすん".to_string()); + assert_eq!(conjugate_verb("にんじゅん", VerbType::I9, VerbConjugation::Passive ), "にんらりゆん".to_string()); + + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::NonPast ), "んんじゅん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::NonPastNegative ), "んんらん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::PastNegative ), "んんらんたん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Past ), "んんちゃん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::NonPastPolite ), "んんじゃびーん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::YesNoInterrogative ), "んんじゅみ".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::WhInterrogative ), "んんじゅが".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Honorific ), "んんじみせーん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Imperative ), "んんれー".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Prohibitive ), "んんるな".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Desiderative ), "んんじぶさん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::AttributiveNonPast ), "んんじゅる".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Progressive ), "んんちょーん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Continuative ), "んんち".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Causative ), "んんらすん".to_string()); + assert_eq!(conjugate_verb("んんじゅん", VerbType::I10, VerbConjugation::Passive ), "んんらりゆん".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPast ), "とぅいん".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::NonPastNegative ), "とぅらん".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::PastNegative ), "とぅらんたん".to_string()); @@ -139,7 +258,7 @@ I1 っあっちゅん 歩ちゅん I2 -くうじゅん 漕じゅん +くーじゅん 漕じゅん っゐーじゅん 泳じゅん っおーじゅん 扇じゅん From b7adc6148530f86d1d00f6225bd992f6690cc150 Mon Sep 17 00:00:00 2001 From: James Haver Date: Thu, 24 May 2018 21:39:34 +0800 Subject: [PATCH 08/16] Add tests to Okinawan --- src/okinawan/lib.rs | 57 +++++++++++--- src/okinawan/tests.rs | 178 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 215 insertions(+), 20 deletions(-) diff --git a/src/okinawan/lib.rs b/src/okinawan/lib.rs index 6c24e31..52f23f1 100644 --- a/src/okinawan/lib.rs +++ b/src/okinawan/lib.rs @@ -23,6 +23,38 @@ pub enum VerbType { IV } +/* +終止類 + 断定非過去 + 断定過去 + 命令 + 禁止 + 意志 + 推量 + 強調 + 疑問 + 過去疑問 + +接続類 + 連体非過去 + 連体過去 + 中止 + 仮定 + 理由 + +派生類 + 否定 + 丁寧 + 使役 + 受身 + 可能 + 尊敬 + 継続 + 希望 + のだ +*/ + + pub enum VerbStem { // 非過去否定 Base, // 基本語幹 base @@ -31,6 +63,7 @@ pub enum VerbStem { // 基本語幹+i : 命令形 い列 // 基本語幹+u : na(な。禁止), ka(まで), kazirii(まで・かぎり) う列 + Connective, // 連用語幹 connective // 連用語幹+i : ga(〜しに), ciroo(〜しそう), uusuN(〜できる), busaN(〜したい) // い列 // 連用語幹+(j)abiiN/ibiiN : // あ列 い列 @@ -64,7 +97,6 @@ pub enum VerbConjugation { // ClauseEnding, // i, does/ and 連用形 // Connective, // (y)a - YesNoInterrogative, // ~mi WhInterrogative, // ~ga @@ -72,16 +104,16 @@ pub enum VerbConjugation { Honorific, Potential, // able to ~juusun Desiderative, // desire, want to - Imperative, + Imperative, // ~ee Prohibitive, // prohibitive - Volitional, + Volitional, // ~ra Causative, // ~sun Passive, // riiN rijuN Continuative, // ti form AttributiveNonPast, // N -> ru AttributivePast, // N -> ru - Progressive, // + Progressive, // ~oon, ~een Gerund, // 音便語幹+i : 〜して, ti ティ形 NonPastPolite, // biin @@ -104,7 +136,8 @@ pub fn base_stem(verb: &str, vt: VerbType) -> String { use self::VerbType::*; match vt { IV => "".to_string(), - III | II1 | II2 | II3 | II4 => format!("{}ら", bare_stem), + III => format!("{}ら", remove_last_mora(verb)), + II1 | II2 | II3 | II4 => format!("{}ら", bare_stem), I1 => format!("{}か", bare_stem), I2 => format!("{}が", bare_stem), I3 => format!("{}た", bare_stem), @@ -123,7 +156,8 @@ pub fn connective_stem(verb: &str, vt: VerbType) -> String { use self::VerbType::*; match vt { IV => "".to_string(), - III | II1 | II2 | II3 | II4 => format!("{}あ", bare_stem), + III => format!("{}あ", remove_last_mora(verb)), + II1 | II2 | II3 | II4 => format!("{}あ", bare_stem), I1 => format!("{}ちゃ", bare_stem), I2 => format!("{}じゃ", bare_stem), I3 => format!("{}ちゃ", bare_stem), @@ -142,7 +176,8 @@ pub fn derivative_stem(verb: &str, vt: VerbType) -> String { use self::VerbType::*; match vt { IV => "".to_string(), - III | II1 | II2 | II3 | II4 => format!("{}あ", bare_stem), + III => remove_last_mora(verb), + II1 | II2 | II3 | II4 => format!("{}あ", bare_stem), I1 => format!("{}ちゃ", bare_stem), I2 => format!("{}じゃ", bare_stem), I3 => format!("{}ちゃ", bare_stem), @@ -161,8 +196,7 @@ pub fn euphonic_stem(verb: &str, vt: VerbType) -> String { use self::VerbType::*; match vt { IV => "".to_string(), - III => format!("{}た", bare_stem), - // II1 | II2 | II3 | II4 => format!("{}ら", bare_stem), + III => format!("{}た", remove_last_mora(verb)), II1 => format!("{}った", bare_stem), II2 => format!("{}た", bare_stem), II3 => format!("{}っちゃ", bare_stem), @@ -225,13 +259,16 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> match vt { I1 | I2 | I3 | I4 | I5 | I6 | I7 | I8 | I9 | I10 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "ん"), II1 | II2 | II3 | II4 => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "い"), "ん"), + III => verb.to_string(), _ => format!("{}{}", replace_last_with_vowel(&derivative_stem(verb, vt), "う"), "ん"), }, NonPastNegative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "あ"), "ん"), PastNegative => format!("{}{}", replace_last_with_vowel(&base_stem(verb, vt), "あ"), "んたん"), Past => format!("{}{}", replace_last_with_vowel(&euphonic_stem(verb, vt), "あ"), "ん"), + AttributivePast => format!("{}{}", replace_last_with_vowel(&euphonic_stem(verb, vt), "あ"), "る"), NonPastPolite => match vt { II1 | II2 | II3 | II4 => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "い"), "びーん"), // やびーん + III => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "い"), "びーん"), _ => format!("{}{}", replace_last_with_vowel(&connective_stem(verb, vt), "あ"), "びーん"), } @@ -267,6 +304,8 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> Causative => format!("{}{}", &base_stem(verb, vt), "すん"), Passive => format!("{}{}", &base_stem(verb, vt), "りゆん"), // りーん + Volitional => replace_last_with_vowel(&base_stem(verb, vt), "あ"), // let's + _ => verb.to_string() } } diff --git a/src/okinawan/tests.rs b/src/okinawan/tests.rs index b201859..87c7491 100644 --- a/src/okinawan/tests.rs +++ b/src/okinawan/tests.rs @@ -80,7 +80,8 @@ mod tests { assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Imperative ), "かけー".to_string()); assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Prohibitive ), "かくな".to_string()); assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Desiderative ), "かちぶさん".to_string()); - + assert_eq!(conjugate_verb("かちゅん", VerbType::I1, VerbConjugation::Volitional ), "かか".to_string()); + assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::NonPast ), "くーじゅん".to_string()); assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::NonPastNegative ), "くーがん".to_string()); assert_eq!(conjugate_verb("くーじゅん", VerbType::I2, VerbConjugation::PastNegative ), "くーがんたん".to_string()); @@ -240,17 +241,169 @@ mod tests { assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Imperative ), "とぅれー".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Prohibitive ), "とぅるな".to_string()); // とぅんな assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Desiderative ), "とぅいぶさん".to_string()); - assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::AttributiveNonPast ), "とぅいる".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Progressive ), "とぅとーん".to_string()); // とぅろーん assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Continuative ), "とぅてぃ".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Causative ), "とぅらすん".to_string()); assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Passive ), "とぅらりゆん".to_string()); + assert_eq!(conjugate_verb("とぅいん", VerbType::II2, VerbConjugation::Volitional ), "とぅら".to_string()); + + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::NonPast ), "ぬぶいん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::NonPastNegative ), "ぬぶらん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::PastNegative ), "ぬぶらんたん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Past ), "ぬぶたん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::NonPastPolite ), "ぬぶいびーん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::YesNoInterrogative ), "ぬぶいみ".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::WhInterrogative ), "ぬぶいが".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Honorific ), "ぬぶいみせーん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Imperative ), "ぬぶれー".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Prohibitive ), "ぬぶるな".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Desiderative ), "ぬぶいぶさん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::AttributiveNonPast ), "ぬぶいる".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Progressive ), "ぬぶとーん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Continuative ), "ぬぶてぃ".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Causative ), "ぬぶらすん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Passive ), "ぬぶらりゆん".to_string()); + assert_eq!(conjugate_verb("ぬぶいん", VerbType::II2, VerbConjugation::Volitional ), "ぬぶら".to_string()); + + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::NonPast ), "もーいん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::NonPastNegative ), "もーらん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::PastNegative ), "もーらんたん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Past ), "もーたん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::NonPastPolite ), "もーいびーん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::YesNoInterrogative ), "もーいみ".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::WhInterrogative ), "もーいが".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Honorific ), "もーいみせーん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Imperative ), "もーれー".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Prohibitive ), "もーるな".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Desiderative ), "もーいぶさん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::AttributiveNonPast ), "もーいる".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Progressive ), "もーとーん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Continuative ), "もーてぃ".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Causative ), "もーらすん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Passive ), "もーらりゆん".to_string()); + assert_eq!(conjugate_verb("もーいん", VerbType::II2, VerbConjugation::Volitional ), "もーら".to_string()); + + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::NonPast ), "ちいん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::NonPastNegative ), "ちらん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::PastNegative ), "ちらんたん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Past ), "ちっちゃん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::NonPastPolite ), "ちいびーん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::YesNoInterrogative ), "ちいみ".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::WhInterrogative ), "ちいが".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Honorific ), "ちいみせーん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Imperative ), "ちれー".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Prohibitive ), "ちるな".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Desiderative ), "ちいぶさん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::AttributiveNonPast ), "ちいる".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Progressive ), "ちっちょーん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Continuative ), "ちっち".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Causative ), "ちらすん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Passive ), "ちらりゆん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II3, VerbConjugation::Volitional ), "ちら".to_string()); + + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::NonPast ), "ちいん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::NonPastNegative ), "ちらん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::PastNegative ), "ちらんたん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Past ), "ちちゃん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::NonPastPolite ), "ちいびーん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::YesNoInterrogative ), "ちいみ".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::WhInterrogative ), "ちいが".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Honorific ), "ちいみせーん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Imperative ), "ちれー".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Prohibitive ), "ちるな".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Desiderative ), "ちいぶさん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::AttributiveNonPast ), "ちいる".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Progressive ), "ちちょーん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Continuative ), "ちち".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Causative ), "ちらすん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Passive ), "ちらりゆん".to_string()); + assert_eq!(conjugate_verb("ちいん", VerbType::II4, VerbConjugation::Volitional ), "ちら".to_string()); + + /* + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::NonPast ), "あん".to_string()); + // assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::NonPastNegative ), "ねーん".to_string()); + // assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::PastNegative ), "ねーんたん".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Past ), "あたん".to_string()); + + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::NonPastPolite ), "ちいびーん".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::YesNoInterrogative ), "ちいみ".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::WhInterrogative ), "ちいが".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Honorific ), "ちいみせーん".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Imperative ), "ちれー".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Prohibitive ), "ちるな".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Desiderative ), "ちいぶさん".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::AttributiveNonPast ), "ある".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Progressive ), "ちちょーん".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Continuative ), "ちち".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Causative ), "ちらすん".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Passive ), "ちらりゆん".to_string()); + assert_eq!(conjugate_verb("あん", VerbType::III, VerbConjugation::Volitional ), "あら".to_string()); + */ + + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::NonPast ), "うん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::NonPastNegative ), "うらん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::PastNegative ), "うらんたん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Past ), "うたん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::NonPastPolite ), "ういびーん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::YesNoInterrogative ), "うみ".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::WhInterrogative ), "うが".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Honorific ), "ういみせーん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Imperative ), "うれー".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Prohibitive ), "うるな".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Desiderative ), "ういぶさん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::AttributiveNonPast ), "うる".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Progressive ), "うとーん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Continuative ), "うてぃ".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Causative ), "うらすん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Passive ), "うらりゆん".to_string()); + assert_eq!(conjugate_verb("うん", VerbType::III, VerbConjugation::Volitional ), "うら".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::NonPast ), "っうん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::NonPastNegative ), "っうらん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::PastNegative ), "っうらんたん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Past ), "っうたん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::NonPastPolite ), "っういびーん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::YesNoInterrogative ), "っうみ".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::WhInterrogative ), "っうが".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Honorific ), "っういみせーん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Imperative ), "っうれー".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Prohibitive ), "っうるな".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Desiderative ), "っういぶさん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::AttributiveNonPast ), "っうる".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Progressive ), "っうとーん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Continuative ), "っうてぃ".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Causative ), "っうらすん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Passive ), "っうらりゆん".to_string()); + assert_eq!(conjugate_verb("っうん", VerbType::III, VerbConjugation::Volitional ), "っうら".to_string()); } } /* +連用形 + +あてぃ +あい + +erminal あん +an +"there is" あたん +atan +"there was" ねーん +neen +ねーらん +neeran +"there isn't" ねーんたん +neentan +ねーらんたん +neerantan +"there wasn't" +Attributive ある +aru あたる +ataru Same as +above. Same as +above. + I1 かちゅん 書ちゅん ちちゅん 聞ちゅん @@ -308,21 +461,24 @@ II2 かいん 刈いん ぬぶいん 登いん っあらいん 洗いん +もーいん 舞いん II3 -きーん 蹴ーん -っいーん 入ーん -ひーん 放ーん -ちーん 切ーん +きいん 蹴いん +っいいん 入いん +ひいん 放いん +ちいん 切いん II4 -にーん 煮ーん -ちーん 着ーん -っいーん 言ーん -いーん 座ーん +にいん 煮いん +ちいん 着いん +っいいん 言いん +いいん 座いん III あん 有ん うん 居ん -やん  +やん + +IV irregulars */ From 9a4903a0a511d02b79d0bfb4e4a4d255b88b7aea Mon Sep 17 00:00:00 2001 From: James Haver Date: Fri, 25 May 2018 16:45:31 +0800 Subject: [PATCH 09/16] Update --- src/kansai.rs | 22 +-- src/lib.rs | 9 +- src/okinawan/lib.rs | 3 +- src/okinawan/tests.rs | 2 +- src/standard/lib.rs | 418 +++++++++++++++++++++++++++++++++++++++++ src/standard/tables.rs | 3 + src/standard/util.rs | 51 +++++ src/util.rs | 8 + 8 files changed, 502 insertions(+), 14 deletions(-) create mode 100644 src/standard/lib.rs create mode 100644 src/standard/tables.rs create mode 100644 src/standard/util.rs diff --git a/src/kansai.rs b/src/kansai.rs index b749c83..f3b69b3 100644 --- a/src/kansai.rs +++ b/src/kansai.rs @@ -1,5 +1,5 @@ use standard; -use standard::{VerbStem}; +use standard::lib::{VerbStem}; use std::cmp::Ordering::{Equal,Greater,Less}; @@ -92,7 +92,7 @@ fn lookup_table(s: &str, table: &'static [(&str,&str)]) -> Option { pub enum VerbType { V1Iru, // いる 上一段 かみいちだん V1Eru, // える 下一段 しもいちだん - Standard(standard::VerbType) + Standard(standard::lib::VerbType) } pub enum VerbConjugation { @@ -104,11 +104,11 @@ pub enum VerbConjugation { // pub enum VerbStem = standard::VerbStem; -pub fn verb_stem(word: &str, verb_stem: standard::VerbStem, verb_type: VerbType) -> String { +pub fn verb_stem(word: &str, verb_stem: standard::lib::VerbStem, verb_type: VerbType) -> String { match verb_type { - VerbType::V1Iru => standard::verb_stem(word,verb_stem,standard::VerbType::V1), - VerbType::V1Eru => standard::verb_stem(word,verb_stem,standard::VerbType::V1), - VerbType::Standard(vt) => standard::verb_stem(word,verb_stem,vt) + VerbType::V1Iru => standard::lib::verb_stem(word,verb_stem,standard::lib::VerbType::V1), + VerbType::V1Eru => standard::lib::verb_stem(word,verb_stem,standard::lib::VerbType::V1), + VerbType::Standard(vt) => standard::lib::verb_stem(word,verb_stem,vt) } } @@ -119,7 +119,7 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> VerbConjugation::NonPastNegative => match vt { VerbType::V1Iru => { - let verb_stem = standard::verb_stem(verb, VerbStem::Irrealis, standard::VerbType::V1); + let verb_stem = standard::lib::verb_stem(verb, VerbStem::Irrealis, standard::lib::VerbType::V1); if verb_stem.chars().count() == 1 { Some(format!("{}{}", verb_stem, "いへん")) } else { @@ -127,14 +127,14 @@ pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> } }, VerbType::V1Eru => { - let verb_stem = standard::verb_stem(verb, VerbStem::Irrealis, standard::VerbType::V1); + let verb_stem = standard::lib::verb_stem(verb, VerbStem::Irrealis, standard::lib::VerbType::V1); if verb_stem.chars().count() == 1 { Some(format!("{}{}", verb_stem, "えへん")) } else { Some(format!("{}{}", verb_stem, "へん")) } }, - VerbType::Standard(vt) => Some(format!("{}{}", standard::verb_stem(verb, VerbStem::Irrealis, vt), "へん")), + VerbType::Standard(vt) => Some(format!("{}{}", standard::lib::verb_stem(verb, VerbStem::Irrealis, vt), "へん")), }, @@ -148,8 +148,8 @@ mod tests { #[test] fn test_conjugate_verb() { - assert_eq!(conjugate_verb("いく",VerbType::Standard(standard::VerbType::V5KS),VerbConjugation::NonPastNegative), Some("いかへん".to_string())); - assert_eq!(conjugate_verb("行く",VerbType::Standard(standard::VerbType::V5KS),VerbConjugation::NonPastNegative), Some("行かへん".to_string())); + assert_eq!(conjugate_verb("いく",VerbType::Standard(standard::lib::VerbType::V5KS),VerbConjugation::NonPastNegative), Some("いかへん".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::Standard(standard::lib::VerbType::V5KS),VerbConjugation::NonPastNegative), Some("行かへん".to_string())); assert_eq!(conjugate_verb("食べる",VerbType::V1Eru,VerbConjugation::NonPastNegative), Some("食べへん".to_string())); assert_eq!(conjugate_verb("見る",VerbType::V1Iru,VerbConjugation::NonPastNegative), Some("見いへん".to_string())); diff --git a/src/lib.rs b/src/lib.rs index 4e297d8..18cf4cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,16 @@ mod util; -pub mod standard; + pub mod kansai; + pub mod okinawan { pub mod lib; mod tables; mod util; mod tests; } + +pub mod standard { + pub mod lib; + mod tables; + mod util; +} diff --git a/src/okinawan/lib.rs b/src/okinawan/lib.rs index 52f23f1..1231861 100644 --- a/src/okinawan/lib.rs +++ b/src/okinawan/lib.rs @@ -1,4 +1,5 @@ -use util::{truncate_chars, split_chars_at, lookups_string}; +// use util::{truncate_chars, split_chars_at, lookups_string}; +use util::{split_chars_at, lookups_string}; use okinawan::tables::{HIRAGANA_TO_ROW_TABLE}; use okinawan::util::{remove_last_mora}; // use okinawan::util as uu; diff --git a/src/okinawan/tests.rs b/src/okinawan/tests.rs index 87c7491..d0a74bd 100644 --- a/src/okinawan/tests.rs +++ b/src/okinawan/tests.rs @@ -1,7 +1,7 @@ +#[cfg(test)] use okinawan::util::{two_char_is_single_mora, remove_last_mora}; use okinawan::lib::*; -#[cfg(test)] mod tests { use super::*; #[test] diff --git a/src/standard/lib.rs b/src/standard/lib.rs new file mode 100644 index 0000000..92f1d53 --- /dev/null +++ b/src/standard/lib.rs @@ -0,0 +1,418 @@ +use standard::tables::{ROMAJI_TO_HIRAGANA_TABLE}; +use std::str::FromStr; +pub use standard::util::romaji_to_hiragana; + +#[derive(Debug, PartialEq)] +pub enum VerbType { + V1, + V5Aru, + V5B, + V5G, + V5K, + V5KS, + V5M, + V5N, + V5R, + V5RI, + V5S, + V5T, + V5U, + V5US, + VK, + VS +} + +impl FromStr for VerbType { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "v1" => Ok(VerbType::V1), + "v5aru" => Ok(VerbType::V5Aru), + "v5b" => Ok(VerbType::V5B), + "v5g" => Ok(VerbType::V5G), + "v5k" => Ok(VerbType::V5K), + "v5k-s" => Ok(VerbType::V5KS), + "v5m" => Ok(VerbType::V5M), + "v5n" => Ok(VerbType::V5N), + "v5r" => Ok(VerbType::V5R), + "v5r-i" => Ok(VerbType::V5RI), + "v5s" => Ok(VerbType::V5S), + "v5t" => Ok(VerbType::V5T), + "v5u" => Ok(VerbType::V5U), + "v5u-s" => Ok(VerbType::V5US), + "vk" => Ok(VerbType::VK), + "vs" => Ok(VerbType::VS), + _ => Err(s.to_string()) + } + } +} + +#[derive(Debug, PartialEq)] +pub enum VerbStem { + Dictionary, // 辞書形, u form + Conjunctive, // 連用形, i form + Irrealis, // 未然形, a form + Imperative, // 命令形, e form + Hypothetical, // 推量形, o form + Te, // テ形, te form + Ta, // タ形, ta form +} + +#[derive(Debug, PartialEq)] +pub enum VerbConjugation { + // 辞書形 Dictionary + // NonPast + // な negative imperative NegativeImperative + // の emphatic/informal Emphatic + // こと nominalize Nominalize + // ことができる able to do PotentialI + // ことがある occasional occurence Occasional + // ことにする decide to do Decisive + // ことになる as a result Resultative + // まえに before ~ing Before + // がはやいか as soon as AsSoonAs + // ともなく without intent Unintentionally + // べきだ idealistic should IdealisticalShould + // まい formal negative volitional NegativeVolitionalFormal + // みたい it seems that EvidentialI + // そう I've heard that EvidentialII + // らしい apparently い-adj EvidentialIII + NonPast, + + // 連用形 conjunctive + // たい desire い-adj DesireI + // たがる desire 五段動詞 DerireII + // はしない strong negative desire DesireNegative + // ながら while ~ing While + // がち tends to TendsTo + // かた way of ~ing WayOf + // そう it looks like EvidentialIV + // やがる yakuza super rude Rude + // にくい difficult to DifficultTo + // やすい easy to EasyTo + // すぎる too much TooMuch + // に+verb of motion go and do Directional + // polite + // ます non past NonPastPolite + // ません negative NonPastNegativePolite + // ました past PastPolite + // ませんでした past negative PastNegativePolite + // ましょう volitional Volitional + // まして connective (rare) ConnectivePolite + // ますれば conditional (rare) ConditionalPolite + // なさい imperative ImperativeI + // な imperative ImperativeII + // お+連用形+になる honorific verb form HonorificI + // お+連用形+なさる honorific verb form HonorificII + // お+連用形+する humble verb form Humble + + While, + DifficultTo, + EasyTo, + TooMuch, + NonPastPolite, + NonPastNegativePolite, + PastPolite, + PastNegativePolite, + VolitionalPolite, + HonorificI, + HonorificII, + Humble, + + // 未然形 + // ない non past negative い形容詞 + // ないで without ing + // なくて negative テ形 + // なかった past negative タ形 + // なければ negative conditional + // なかろう negative volitional + // なくていい don't have to (informal) + // なくてはいけない must (compulsion) + // なくちゃ must (compulsion) + // なければならない must (obligation) + // なきゃ must (obligation) + // なかればいけない must (obligation) + // ず negative (old) + // ずに without ~ing (formal) + // れる 五段 passive/honorific + // られる 一段 passive/honorific + // せる/す 五段 caustive + // させる/さす 一段 causative + // せられる/される 五段 causative passive + // させられる 一段 caustive passive + + NonPastNegative, + PastNegative, + Passive, + Causative, + CausativePassive, + + // 命令形 + // ば 五段 conditional + // れば 一段 conditional + // る 五段 potential + // られる/れる 一段 potential + // ろ 一段 imperative + // conditional + よかった I wish that + + Conditional, + + // 推量形 + // う 五段 volitional/tenative/presumptive + // よう 一段 volitional/tenative/presumptive + + Volitional, + + // テ形 + // あげる・くれる・もらう benefit being given or received + // いく continuing change of state + // いる continuous/habitual action + // おく preparatory action + // しまう completed acttion + // よかった I'm glad that + // みる try and see + // ほしい favor request + // ある changed state + // から after ~ing + // くる state change + // はいけない must not + // はならない must not + // はだめ must not + // もかまわない permissive + // もいい permissive + // も even if + // すみません apologetic + + Try, + + // タ形 + // から reason for next clause + // りする etc., repeated opposite + // ら if/when ... + // ばかり just happened + // ほうがいい suggestive + + Just +} + +pub fn verb_stem(word: &str, verb_stem: VerbStem, verb_type: VerbType) -> String { + if word.len() > 0 { + let s = truncate_chars(word, (word.chars().count() - 1)).to_string(); + + match (verb_type,verb_stem) { + (_, VerbStem::Dictionary) => word.to_string(), + + (VerbType::V1,VerbStem::Conjunctive) => s + "ない", + (VerbType::V1,VerbStem::Irrealis) => s, + // (VerbType::V1,VerbStem::Imperative) => (s + "よ").to_string(), + (VerbType::V1,VerbStem::Imperative) => s, + (VerbType::V1,VerbStem::Hypothetical) => s , // + "れ", + (VerbType::V1,VerbStem::Te) => s + "て", + (VerbType::V1,VerbStem::Ta) => s + "た", + + (VerbType::V5Aru,VerbStem::Conjunctive) => s + "り", + (VerbType::V5Aru,VerbStem::Irrealis) => "ない".to_string(), + (VerbType::V5Aru,VerbStem::Imperative) => s + "れ", + (VerbType::V5Aru,VerbStem::Hypothetical) => s + "ろ", + (VerbType::V5Aru,VerbStem::Te) => s + "って", + (VerbType::V5Aru,VerbStem::Ta) => s + "った", + + (VerbType::V5B,VerbStem::Conjunctive) => s + "び", + (VerbType::V5B,VerbStem::Irrealis) => s + "ば", + (VerbType::V5B,VerbStem::Imperative) => s + "べ", + (VerbType::V5B,VerbStem::Hypothetical) => s + "ぼ", + (VerbType::V5B,VerbStem::Te) => s + "んで", + (VerbType::V5B,VerbStem::Ta) => s + "んだ", + + (VerbType::V5G,VerbStem::Conjunctive) => s + "ぎ", + (VerbType::V5G,VerbStem::Irrealis) => s + "が", + (VerbType::V5G,VerbStem::Imperative) => s + "げ", + (VerbType::V5G,VerbStem::Hypothetical) => s + "ご", + (VerbType::V5G,VerbStem::Te) => s + "いで", + (VerbType::V5G,VerbStem::Ta) => s + "いだ", + + (VerbType::V5K,VerbStem::Conjunctive) => s + "き", + (VerbType::V5K,VerbStem::Irrealis) => s + "か", + (VerbType::V5K,VerbStem::Imperative) => s + "け", + (VerbType::V5K,VerbStem::Hypothetical) => s + "こ", + (VerbType::V5K,VerbStem::Te) => s + "いて", + (VerbType::V5K,VerbStem::Ta) => s + "いた", + + (VerbType::V5KS,VerbStem::Conjunctive) => s + "き", + (VerbType::V5KS,VerbStem::Irrealis) => s + "か", + (VerbType::V5KS,VerbStem::Imperative) => s + "け", + (VerbType::V5KS,VerbStem::Hypothetical) => s + "こ", + (VerbType::V5KS,VerbStem::Te) => s + "って", + (VerbType::V5KS,VerbStem::Ta) => s + "った", + + (VerbType::V5M,VerbStem::Conjunctive) => s + "み", + (VerbType::V5M,VerbStem::Irrealis) => s + "ま", + (VerbType::V5M,VerbStem::Imperative) => s + "め", + (VerbType::V5M,VerbStem::Hypothetical) => s + "も", + (VerbType::V5M,VerbStem::Te) => s + "んで", + (VerbType::V5M,VerbStem::Ta) => s + "んだ", + + (VerbType::V5N,VerbStem::Conjunctive) => s + "に", + (VerbType::V5N,VerbStem::Irrealis) => s + "な", + (VerbType::V5N,VerbStem::Imperative) => s + "ね", + (VerbType::V5N,VerbStem::Hypothetical) => s + "の", + (VerbType::V5N,VerbStem::Te) => s + "んで", + (VerbType::V5N,VerbStem::Ta) => s + "んだ", + + (VerbType::V5R,VerbStem::Conjunctive) => s + "り", + (VerbType::V5R,VerbStem::Irrealis) => s + "ら", + (VerbType::V5R,VerbStem::Imperative) => s + "れ", + (VerbType::V5R,VerbStem::Hypothetical) => s + "ろ", + (VerbType::V5R,VerbStem::Te) => s + "って", + (VerbType::V5R,VerbStem::Ta) => s + "った", + + (VerbType::V5RI,VerbStem::Te) => s + "って", + + (VerbType::V5S,VerbStem::Conjunctive) => s + "し", + (VerbType::V5S,VerbStem::Irrealis) => s + "さ", + (VerbType::V5S,VerbStem::Imperative) => s + "せ", + (VerbType::V5S,VerbStem::Hypothetical) => s + "そ", + (VerbType::V5S,VerbStem::Te) => s + "して", + (VerbType::V5S,VerbStem::Ta) => s + "した", + + (VerbType::V5T,VerbStem::Conjunctive) => s + "ち", + (VerbType::V5T,VerbStem::Irrealis) => s + "た", + (VerbType::V5T,VerbStem::Imperative) => s + "て", + (VerbType::V5T,VerbStem::Hypothetical) => s + "と", + (VerbType::V5T,VerbStem::Te) => s + "って", + (VerbType::V5T,VerbStem::Ta) => s + "った", + + (VerbType::V5U,VerbStem::Te) => s + "って", + (VerbType::V5US,VerbStem::Te) => s + "うて", + (VerbType::VK,VerbStem::Te) => s + "て", + (VerbType::VS,VerbStem::Te) => "して".to_string(), + + (VerbType::V5U,VerbStem::Ta) => s + "った", + (VerbType::V5US,VerbStem::Ta) => s + "うた", + (VerbType::VK,VerbStem::Ta) => s + "た", + (VerbType::VS,VerbStem::Ta) => "した".to_string(), + + (_,_) => s + + } + } else { + word.to_string() + } +} + + +// assumes that verb is in dictionary form +pub fn conjugate_verb(verb: &str, vt: VerbType, conjugation: VerbConjugation) -> Option { + match conjugation { + // 辞書形 + VerbConjugation::NonPast => Some(verb.to_string()), + + // 連用形 + VerbConjugation::While => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "ながら")), + VerbConjugation::DifficultTo => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "にくい")), + VerbConjugation::EasyTo => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "やすい")), + VerbConjugation::TooMuch => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "すぎる")), + VerbConjugation::NonPastPolite => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "ます")), + VerbConjugation::NonPastNegativePolite => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "ません")), + VerbConjugation::PastPolite => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "ました")), + VerbConjugation::PastNegativePolite => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "ませんでした")), + VerbConjugation::VolitionalPolite => Some(format!("{}{}", verb_stem(verb, VerbStem::Conjunctive, vt), "ましょう")), + VerbConjugation::HonorificI => Some(format!("{}{}{}", "お", verb_stem(verb, VerbStem::Conjunctive, vt), "になる")), + VerbConjugation::HonorificII => Some(format!("{}{}{}", "お", verb_stem(verb, VerbStem::Conjunctive, vt), "なさる")), + VerbConjugation::Humble => Some(format!("{}{}{}", "お", verb_stem(verb, VerbStem::Conjunctive, vt), "する")), + + // 未然形 + VerbConjugation::NonPastNegative => Some(format!("{}{}", verb_stem(verb, VerbStem::Irrealis, vt), "ない")), + VerbConjugation::PastNegative => Some(format!("{}{}", verb_stem(verb, VerbStem::Irrealis, vt), "なかった")), + + VerbConjugation::Passive => + match vt { + VerbType::V1 => Some(format!("{}{}", verb_stem(verb, VerbStem::Irrealis, vt), "られる")), + _ => Some(format!("{}{}", verb_stem(verb, VerbStem::Irrealis, vt), "れる")) + }, + VerbConjugation::Causative => + match vt { + VerbType::V1 => Some(format!("{}{}", verb_stem(verb, VerbStem::Irrealis, vt), "させる")), + _ => Some(format!("{}{}", verb_stem(verb, VerbStem::Irrealis, vt), "せる")) + }, + VerbConjugation::CausativePassive => + match vt { + VerbType::V1 => Some(format!("{}{}", verb_stem(verb, VerbStem::Irrealis, vt), "させられる")), + _ => Some(format!("{}{}", verb_stem(verb, VerbStem::Irrealis, vt), "せられる")) + }, + + // 命令形 + + VerbConjugation::Conditional => + match vt { + VerbType::V1 => Some(format!("{}{}", verb_stem(verb, VerbStem::Imperative, vt), "れば")), + _ => Some(format!("{}{}", verb_stem(verb, VerbStem::Imperative, vt), "ば")) + }, + + // 推量形 + VerbConjugation::Volitional => + match vt { + VerbType::V1 => Some(format!("{}{}", verb_stem(verb, VerbStem::Hypothetical, vt), "よう")), + _ => Some(format!("{}{}", verb_stem(verb, VerbStem::Hypothetical, vt), "う")) + }, + + // テ形 + VerbConjugation::Try => Some(format!("{}{}", verb_stem(verb, VerbStem::Te, vt), "みる")), + + // タ形 + VerbConjugation::Just => Some(format!("{}{}", verb_stem(verb, VerbStem::Ta, vt), "ばかり")), + + // _ => None + } +} + +pub fn truncate_chars(s: &str, max_chars: usize) -> &str { + match s.char_indices().nth(max_chars) { + None => s, + Some((idx, _)) => &s[..idx], + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn truncate_utf8() { + let string = "いく".to_string().to_owned(); + let char_count = string.chars().count(); + let left_side = truncate_chars(&string, char_count - 1); + assert_eq!(char_count, 2); + assert_eq!(left_side, "い"); + } + + #[test] + fn verb_type_from_str() { + assert_eq!(VerbType::from_str("v1"), Ok(VerbType::V1)); + assert_ne!(VerbType::from_str("v1 "), Ok(VerbType::V1)); + } + + #[test] + fn test_conjugate_verb() { + assert_eq!(conjugate_verb("いく",VerbType::V5KS,VerbConjugation::NonPast), Some("いく".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::Try), Some("行ってみる".to_string())); + assert_eq!(conjugate_verb("いく",VerbType::V5KS,VerbConjugation::Try), Some("いってみる".to_string())); + + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::NonPastPolite), Some("行きます".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::NonPastNegativePolite), Some("行きません".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::PastPolite), Some("行きました".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::PastNegativePolite), Some("行きませんでした".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::VolitionalPolite), Some("行きましょう".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::Conditional), Some("行けば".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::Volitional), Some("行こう".to_string())); + assert_eq!(conjugate_verb("行く",VerbType::V5KS,VerbConjugation::Just), Some("行ったばかり".to_string())); + + assert_eq!(conjugate_verb("食べる",VerbType::V1,VerbConjugation::Passive), Some("食べられる".to_string())); + assert_eq!(conjugate_verb("食べる",VerbType::V1,VerbConjugation::Causative), Some("食べさせる".to_string())); + assert_eq!(conjugate_verb("食べる",VerbType::V1,VerbConjugation::CausativePassive), Some("食べさせられる".to_string())); + assert_eq!(conjugate_verb("食べる",VerbType::V1,VerbConjugation::Conditional), Some("食べれば".to_string())); + assert_eq!(conjugate_verb("食べる",VerbType::V1,VerbConjugation::Volitional), Some("食べよう".to_string())); + assert_eq!(conjugate_verb("食べる",VerbType::V1,VerbConjugation::Just), Some("食べたばかり".to_string())); + } +} diff --git a/src/standard/tables.rs b/src/standard/tables.rs new file mode 100644 index 0000000..0118e2b --- /dev/null +++ b/src/standard/tables.rs @@ -0,0 +1,3 @@ +pub const ROMAJI_TO_HIRAGANA_TABLE: &'static [(&str, &str)] = &[ + ("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsu","つ"),("tu","つ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("wo","を"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zo","ぞ"),("zu","ず"),("-","ー") +]; diff --git a/src/standard/util.rs b/src/standard/util.rs new file mode 100644 index 0000000..8ce3b39 --- /dev/null +++ b/src/standard/util.rs @@ -0,0 +1,51 @@ +use standard::tables::*; +use util; + +pub fn romaji_to_hiragana(romaji: &str) -> String { + let romaji_len = romaji.len(); + + let mut romaji_index = 0; + let mut window = 1; // 1 to 3 + let mut hiragana = "".to_string(); + + // convert 1 to 3 ascii chars to a single hirgana + // i is starting point, window is length to be looked up + while romaji_index < romaji_len && romaji_index + window < romaji_len + 1 { + let romaji_sub = &romaji[romaji_index .. romaji_index + window]; + + // check for gemminate consonant, if it exists, consume the char + // add a "っ" and skip the lookup + if window == 1 && romaji_index + 1 < romaji_len && util::is_consonant(romaji_sub) { + let romaji_peek_ahead = &romaji[romaji_index + 1 .. romaji_index + 2]; + + if romaji_sub == romaji_peek_ahead { + hiragana = format!("{}{}", hiragana, "っ"); + romaji_index += 1; + window = 1; + continue; + } + } + + match util::lookup(romaji_sub, ROMAJI_TO_HIRAGANA_TABLE) { + Some(hiragana_index) => { + let single_hiragana = ROMAJI_TO_HIRAGANA_TABLE[hiragana_index].1.to_string(); + hiragana = format!("{}{}", hiragana, single_hiragana); + romaji_index += window; + window = 1; + }, + None => { + // romaji_sub was not found in the table, increase the window size if it was + // not found. If the window is too large then move the index over and reset + // the window size to 1 + if window < 3 { + window += 1; + } else { + romaji_index += 1; + window = 1; + } + }, + } + } + + hiragana.to_string() +} diff --git a/src/util.rs b/src/util.rs index 1615715..66ea1a8 100644 --- a/src/util.rs +++ b/src/util.rs @@ -80,6 +80,14 @@ pub fn hiragana_merges_n(s: &str) -> bool { s.chars().count() == 1 && ["ぁ","あ","ぃ","い","ぅ","う","ぇ","え","ぉ","お","ゃ","や","ゅ","ゆ","ょ","よ","ん"].contains(&s) } +pub fn is_hiragana(c: char) -> bool { + c >= '\u{3040}' && c <= '\u{309F}' +} + +pub fn is_katakana(c: char) -> bool { + c >= '\u{30A0}' && c <= '\u{30FF}' +} + pub const HIRAGANA_TO_VOWEL_TABLE: &'static [(&str, &str)] = &[ ("ぁ","あ"),("あ","あ"),("ぃ","い"),("い","い"),("ぅ","う"),("う","う"),("ぇ","え"),("え","え"),("ぉ","お"),("お","お"),("か","あ"),("が","あ"),("き","い"),("ぎ","い"),("く","う"),("ぐ","う"),("け","え"),("げ","え"),("こ","お"),("ご","お"),("さ","あ"),("ざ","あ"),("し","い"),("じ","い"),("す","う"),("ず","う"),("せ","え"),("ぜ","え"),("そ","お"),("ぞ","お"),("た","あ"),("だ","あ"),("ち","い"),("ぢ","い"),("つ","う"),("づ","う"),("て","え"),("で","え"),("と","お"),("ど","お"),("な","あ"),("に","い"),("ぬ","う"),("ね","え"),("の","お"),("は","あ"),("ば","あ"),("ぱ","あ"),("ひ","い"),("び","い"),("ぴ","い"),("ふ","う"),("ぶ","う"),("ぷ","う"),("へ","え"),("べ","え"),("ぺ","え"),("ほ","お"),("ぼ","お"),("ぽ","お"),("ま","あ"),("み","い"),("む","う"),("め","え"),("も","お"),("ゃ","あ"),("や","あ"),("ゅ","う"),("ゆ","う"),("ょ","お"),("よ","お"),("ら","あ"),("り","い"),("る","う"),("れ","え"),("ろ","お"),("わ","あ"),("ゐ","い"),("ゑ","え"),("を","お"),("ゔ","う"),("ゕ","あ"),("ゖ","え") ]; From 7e10f661230c27897c71ae148bc7905c396308d1 Mon Sep 17 00:00:00 2001 From: James Haver Date: Fri, 25 May 2018 21:49:09 +0800 Subject: [PATCH 10/16] Trying to fix romaji to hiragana --- Cargo.toml | 1 + src/standard/lib.rs | 2 +- src/standard/util.rs | 119 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b7faca6..f4dfd55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,4 @@ version = "0.1.0" authors = ["James Haver II "] [dependencies] +unicode-segmentation = "1.2.1" \ No newline at end of file diff --git a/src/standard/lib.rs b/src/standard/lib.rs index 92f1d53..36e7c9d 100644 --- a/src/standard/lib.rs +++ b/src/standard/lib.rs @@ -1,6 +1,6 @@ use standard::tables::{ROMAJI_TO_HIRAGANA_TABLE}; use std::str::FromStr; -pub use standard::util::romaji_to_hiragana; +pub use standard::util::{romaji_to_hiragana, romaji_to_hiragana_safe}; #[derive(Debug, PartialEq)] pub enum VerbType { diff --git a/src/standard/util.rs b/src/standard/util.rs index 8ce3b39..dce2c6e 100644 --- a/src/standard/util.rs +++ b/src/standard/util.rs @@ -1,6 +1,110 @@ use standard::tables::*; use util; +pub fn is_romaji(s: &str) -> bool { + s.len() == 1 && ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","-"].contains(&s) +} + +fn match_char(data: &char) -> bool { + match *data { + '\x01'...'\x08' | + '\u{10FFFE}'...'\u{10FFFF}' => true, + _ => false, + } +} + +fn c_to_string(cs: Vec<(usize,char)>) -> String { + cs.into_iter().map(|(_, c)| c).collect() +} + +pub fn romaji_to_hiragana_safe(romaji: &str) -> String { + let romaji_chars = romaji.chars(); + let romaji_len = romaji_chars.count(); + let romaji_indices = romaji.char_indices().collect::>(); + let ss = c_to_string(romaji_indices); + + // println!("{}", romaji_len); + // println!("{:?}", romaji_indices); + + let mut romaji_index = 0; + let mut window = 1; // 1 to 3 + let mut hiragana = "".to_string(); + + // convert 1 to 3 ascii chars to a single hirgana + // i is starting point, window is length to be looked up + while romaji_index < romaji_len && romaji_index + window < romaji_len + 1 { + let romaji_sub_vec = &romaji.chars().collect::>()[romaji_index .. romaji_index + window]; + let romaji_sub_string: String = romaji_sub_vec.into_iter().collect(); + let romaji_sub: &str = &romaji_sub_string[..]; + + let romaji_sub_vec_last = &romaji.chars().collect::>()[romaji_index + window - 1 .. romaji_index + window]; + let romaji_sub_string_last: String = romaji_sub_vec_last.into_iter().collect(); + let romaji_sub_last: &str = &romaji_sub_string_last[..]; + + if !is_romaji(romaji_sub_last) { + println!("{}",romaji_sub_last); + hiragana = format!("{}{}", hiragana, romaji_sub); + // if match_char(&romaji_sub_vec_last[0]) { + // romaji_index += 1; + // } else { + // romaji_index += 2; +// } + romaji_index += 2; + + window = 1; + + } else { + // println!("{}", romaji_sub); + // check for gemminate consonant, if it exists, consume the char + // add a "っ" and skip the lookup + if window == 1 && romaji_index + 1 < romaji_len && util::is_consonant(romaji_sub) { + let romaji_peek_ahead_vec = &romaji.chars().collect::>()[romaji_index + 1 .. romaji_index + 2]; + let romaji_peek_ahead: String = romaji_peek_ahead_vec.into_iter().collect(); + + if romaji_sub == romaji_peek_ahead { + hiragana = format!("{}{}", hiragana, "っ"); + romaji_index += 1; + window = 1; + continue; + } + } + + match util::lookup(romaji_sub, ROMAJI_TO_HIRAGANA_TABLE) { + Some(hiragana_index) => { + let single_hiragana = ROMAJI_TO_HIRAGANA_TABLE[hiragana_index].1.to_string(); + hiragana = format!("{}{}", hiragana, single_hiragana); + romaji_index += window; + window = 1; + }, + None => { + // romaji_sub was not found in the table, increase the window size if it was + // not found. If the window is too large then move the index over and reset + // the window size to 1 + if window < 3 { + // keep the unconverted character + if romaji_index + window >= romaji_len { + hiragana = format!("{}{}", hiragana, romaji_sub); + } + window += 1; + } else { + let romaji_sub_vec_first = &romaji.chars().collect::>()[romaji_index .. romaji_index + 1]; + let romaji_sub_string_first: String = romaji_sub_vec_first.into_iter().collect(); + let romaji_sub_first: &str = &romaji_sub_string_first[..]; + + hiragana = format!("{}{}", hiragana, romaji_sub_first); + romaji_index += 1; + window = 1; + } + }, + } + } + } + + hiragana.to_string() +} + + +// this expects everything to be romaji and is destructive pub fn romaji_to_hiragana(romaji: &str) -> String { let romaji_len = romaji.len(); @@ -49,3 +153,18 @@ pub fn romaji_to_hiragana(romaji: &str) -> String { hiragana.to_string() } + + +mod tests { + use super::*; + #[test] + fn test_romaji_to_hiragana_safe() { +// assert_eq!(romaji_to_hiragana_safe("arigatou"), "ありがとう".to_string()); +// assert_eq!(romaji_to_hiragana_safe("ar"), "あr".to_string()); +// assert_eq!(romaji_to_hiragana_safe("ari"), "あり".to_string()); + assert_eq!(romaji_to_hiragana_safe("あri"), "あり".to_string()); +// assert_eq!(romaji_to_hiragana_safe("hri"), "hり".to_string()); +// assert_eq!(romaji_to_hiragana_safe("sme"), "sめ".to_string()); + assert_eq!(romaji_to_hiragana_safe("wあり"), "wあり".to_string()); + } +} From d060bce7695b827e5f5f0003c58d76f4a9709939 Mon Sep 17 00:00:00 2001 From: James Haver Date: Fri, 25 May 2018 22:04:55 +0800 Subject: [PATCH 11/16] It works so far --- src/standard/util.rs | 47 +++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/standard/util.rs b/src/standard/util.rs index dce2c6e..4380184 100644 --- a/src/standard/util.rs +++ b/src/standard/util.rs @@ -1,6 +1,10 @@ use standard::tables::*; use util; +extern crate unicode_segmentation; + +use self::unicode_segmentation::UnicodeSegmentation; + pub fn is_romaji(s: &str) -> bool { s.len() == 1 && ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","-"].contains(&s) } @@ -18,14 +22,9 @@ fn c_to_string(cs: Vec<(usize,char)>) -> String { } pub fn romaji_to_hiragana_safe(romaji: &str) -> String { - let romaji_chars = romaji.chars(); - let romaji_len = romaji_chars.count(); - let romaji_indices = romaji.char_indices().collect::>(); - let ss = c_to_string(romaji_indices); + let romaji_chars = UnicodeSegmentation::graphemes(romaji, true).collect::>(); + let romaji_len = romaji_chars.len(); - // println!("{}", romaji_len); - // println!("{:?}", romaji_indices); - let mut romaji_index = 0; let mut window = 1; // 1 to 3 let mut hiragana = "".to_string(); @@ -33,23 +32,17 @@ pub fn romaji_to_hiragana_safe(romaji: &str) -> String { // convert 1 to 3 ascii chars to a single hirgana // i is starting point, window is length to be looked up while romaji_index < romaji_len && romaji_index + window < romaji_len + 1 { - let romaji_sub_vec = &romaji.chars().collect::>()[romaji_index .. romaji_index + window]; - let romaji_sub_string: String = romaji_sub_vec.into_iter().collect(); + let romaji_sub_vec = &romaji_chars[romaji_index .. romaji_index + window]; + let romaji_sub_string: String = romaji_sub_vec.join(""); let romaji_sub: &str = &romaji_sub_string[..]; - let romaji_sub_vec_last = &romaji.chars().collect::>()[romaji_index + window - 1 .. romaji_index + window]; - let romaji_sub_string_last: String = romaji_sub_vec_last.into_iter().collect(); + let romaji_sub_vec_last = &romaji_chars[romaji_index + window - 1 .. romaji_index + window]; + let romaji_sub_string_last: String = romaji_sub_vec_last.join(""); let romaji_sub_last: &str = &romaji_sub_string_last[..]; if !is_romaji(romaji_sub_last) { - println!("{}",romaji_sub_last); hiragana = format!("{}{}", hiragana, romaji_sub); - // if match_char(&romaji_sub_vec_last[0]) { - // romaji_index += 1; - // } else { - // romaji_index += 2; -// } - romaji_index += 2; + romaji_index += romaji_sub_vec.len(); window = 1; @@ -58,8 +51,8 @@ pub fn romaji_to_hiragana_safe(romaji: &str) -> String { // check for gemminate consonant, if it exists, consume the char // add a "っ" and skip the lookup if window == 1 && romaji_index + 1 < romaji_len && util::is_consonant(romaji_sub) { - let romaji_peek_ahead_vec = &romaji.chars().collect::>()[romaji_index + 1 .. romaji_index + 2]; - let romaji_peek_ahead: String = romaji_peek_ahead_vec.into_iter().collect(); + let romaji_peek_ahead_vec = &romaji_chars[romaji_index + 1 .. romaji_index + 2]; + let romaji_peek_ahead: String = romaji_peek_ahead_vec.join(""); if romaji_sub == romaji_peek_ahead { hiragana = format!("{}{}", hiragana, "っ"); @@ -87,8 +80,8 @@ pub fn romaji_to_hiragana_safe(romaji: &str) -> String { } window += 1; } else { - let romaji_sub_vec_first = &romaji.chars().collect::>()[romaji_index .. romaji_index + 1]; - let romaji_sub_string_first: String = romaji_sub_vec_first.into_iter().collect(); + let romaji_sub_vec_first = &romaji_chars[romaji_index .. romaji_index + 1]; + let romaji_sub_string_first: String = romaji_sub_vec_first.join(""); let romaji_sub_first: &str = &romaji_sub_string_first[..]; hiragana = format!("{}{}", hiragana, romaji_sub_first); @@ -159,12 +152,12 @@ mod tests { use super::*; #[test] fn test_romaji_to_hiragana_safe() { -// assert_eq!(romaji_to_hiragana_safe("arigatou"), "ありがとう".to_string()); -// assert_eq!(romaji_to_hiragana_safe("ar"), "あr".to_string()); -// assert_eq!(romaji_to_hiragana_safe("ari"), "あり".to_string()); + assert_eq!(romaji_to_hiragana_safe("arigatou"), "ありがとう".to_string()); + assert_eq!(romaji_to_hiragana_safe("ar"), "あr".to_string()); + assert_eq!(romaji_to_hiragana_safe("ari"), "あり".to_string()); assert_eq!(romaji_to_hiragana_safe("あri"), "あり".to_string()); -// assert_eq!(romaji_to_hiragana_safe("hri"), "hり".to_string()); -// assert_eq!(romaji_to_hiragana_safe("sme"), "sめ".to_string()); + assert_eq!(romaji_to_hiragana_safe("hri"), "hり".to_string()); + assert_eq!(romaji_to_hiragana_safe("sme"), "sめ".to_string()); assert_eq!(romaji_to_hiragana_safe("wあり"), "wあり".to_string()); } } From 04c6498342be9682362e21ad6dc4c3a253e71d09 Mon Sep 17 00:00:00 2001 From: James Haver Date: Fri, 25 May 2018 22:36:11 +0800 Subject: [PATCH 12/16] Fix n --- Cargo.toml | 1 - src/standard/util.rs | 48 ++++++++++++++++++-------------------------- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f4dfd55..b7faca6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,4 +4,3 @@ version = "0.1.0" authors = ["James Haver II "] [dependencies] -unicode-segmentation = "1.2.1" \ No newline at end of file diff --git a/src/standard/util.rs b/src/standard/util.rs index 4380184..e9b4985 100644 --- a/src/standard/util.rs +++ b/src/standard/util.rs @@ -1,29 +1,14 @@ use standard::tables::*; use util; -extern crate unicode_segmentation; - -use self::unicode_segmentation::UnicodeSegmentation; - pub fn is_romaji(s: &str) -> bool { s.len() == 1 && ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","-"].contains(&s) } -fn match_char(data: &char) -> bool { - match *data { - '\x01'...'\x08' | - '\u{10FFFE}'...'\u{10FFFF}' => true, - _ => false, - } -} - -fn c_to_string(cs: Vec<(usize,char)>) -> String { - cs.into_iter().map(|(_, c)| c).collect() -} - pub fn romaji_to_hiragana_safe(romaji: &str) -> String { - let romaji_chars = UnicodeSegmentation::graphemes(romaji, true).collect::>(); - let romaji_len = romaji_chars.len(); + let romaji_chars = romaji.chars(); + let romaji_len = romaji_chars.count(); + let romaji_indices = romaji.char_indices().collect::>(); let mut romaji_index = 0; let mut window = 1; // 1 to 3 @@ -32,33 +17,38 @@ pub fn romaji_to_hiragana_safe(romaji: &str) -> String { // convert 1 to 3 ascii chars to a single hirgana // i is starting point, window is length to be looked up while romaji_index < romaji_len && romaji_index + window < romaji_len + 1 { - let romaji_sub_vec = &romaji_chars[romaji_index .. romaji_index + window]; - let romaji_sub_string: String = romaji_sub_vec.join(""); + let romaji_sub_vec = &romaji.chars().collect::>()[romaji_index .. romaji_index + window]; + let romaji_sub_string: String = romaji_sub_vec.into_iter().collect(); let romaji_sub: &str = &romaji_sub_string[..]; - let romaji_sub_vec_last = &romaji_chars[romaji_index + window - 1 .. romaji_index + window]; - let romaji_sub_string_last: String = romaji_sub_vec_last.join(""); + let romaji_sub_vec_last = &romaji.chars().collect::>()[romaji_index + window - 1 .. romaji_index + window]; + let romaji_sub_string_last: String = romaji_sub_vec_last.into_iter().collect(); let romaji_sub_last: &str = &romaji_sub_string_last[..]; if !is_romaji(romaji_sub_last) { + println!("{}",romaji_sub_last); hiragana = format!("{}{}", hiragana, romaji_sub); romaji_index += romaji_sub_vec.len(); window = 1; } else { - // println!("{}", romaji_sub); // check for gemminate consonant, if it exists, consume the char // add a "っ" and skip the lookup - if window == 1 && romaji_index + 1 < romaji_len && util::is_consonant(romaji_sub) { - let romaji_peek_ahead_vec = &romaji_chars[romaji_index + 1 .. romaji_index + 2]; - let romaji_peek_ahead: String = romaji_peek_ahead_vec.join(""); + if window == 1 && romaji_index + 1 < romaji_len && (util::is_consonant(romaji_sub) || romaji_sub == "n") { + let romaji_peek_ahead_vec = &romaji.chars().collect::>()[romaji_index + 1 .. romaji_index + 2]; + let romaji_peek_ahead: String = romaji_peek_ahead_vec.into_iter().collect(); if romaji_sub == romaji_peek_ahead { hiragana = format!("{}{}", hiragana, "っ"); romaji_index += 1; window = 1; continue; + } else if romaji_sub == "n" { + hiragana = format!("{}{}", hiragana, "ん"); + romaji_index += 1; + window = 1; + continue; } } @@ -80,8 +70,8 @@ pub fn romaji_to_hiragana_safe(romaji: &str) -> String { } window += 1; } else { - let romaji_sub_vec_first = &romaji_chars[romaji_index .. romaji_index + 1]; - let romaji_sub_string_first: String = romaji_sub_vec_first.join(""); + let romaji_sub_vec_first = &romaji.chars().collect::>()[romaji_index .. romaji_index + 1]; + let romaji_sub_string_first: String = romaji_sub_vec_first.into_iter().collect(); let romaji_sub_first: &str = &romaji_sub_string_first[..]; hiragana = format!("{}{}", hiragana, romaji_sub_first); @@ -96,7 +86,6 @@ pub fn romaji_to_hiragana_safe(romaji: &str) -> String { hiragana.to_string() } - // this expects everything to be romaji and is destructive pub fn romaji_to_hiragana(romaji: &str) -> String { let romaji_len = romaji.len(); @@ -159,5 +148,6 @@ mod tests { assert_eq!(romaji_to_hiragana_safe("hri"), "hり".to_string()); assert_eq!(romaji_to_hiragana_safe("sme"), "sめ".to_string()); assert_eq!(romaji_to_hiragana_safe("wあり"), "wあり".to_string()); + assert_eq!(romaji_to_hiragana_safe("nt"), "んt".to_string()); } } From d475f436b146e50edd0e89fdb7b7f9f418319a88 Mon Sep 17 00:00:00 2001 From: James Haver Date: Sun, 27 May 2018 02:04:12 +0800 Subject: [PATCH 13/16] More items to Hiragana Table --- src/standard/tables.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/standard/tables.rs b/src/standard/tables.rs index 0118e2b..647ab4b 100644 --- a/src/standard/tables.rs +++ b/src/standard/tables.rs @@ -1,3 +1,3 @@ pub const ROMAJI_TO_HIRAGANA_TABLE: &'static [(&str, &str)] = &[ - ("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsu","つ"),("tu","つ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("wo","を"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zo","ぞ"),("zu","ず"),("-","ー") -]; + ("-","ー"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("la","ぁ"),("le","ぇ"),("li","ぃ"),("lka","ヵ"),("lke","ヶ"),("lo","ぉ"),("lu","ぅ"),("lya","ゃ"),("lyo","ょ"),("lyu","ゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsu","つ"),("tu","つ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("wo","を"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xka","ヵ"),("xke","ヶ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zo","ぞ"),("zu","ず") +]; From a34ea85efa0d81ea4d9a8c1fa4e6768b2605d4b4 Mon Sep 17 00:00:00 2001 From: James Haver Date: Sun, 27 May 2018 12:03:39 +0800 Subject: [PATCH 14/16] More fixes to romaji to hiragana --- src/standard/tables.rs | 2 +- src/standard/util.rs | 25 +++++++++++++++---------- src/util.rs | 2 +- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/standard/tables.rs b/src/standard/tables.rs index 647ab4b..667a1bb 100644 --- a/src/standard/tables.rs +++ b/src/standard/tables.rs @@ -1,3 +1,3 @@ pub const ROMAJI_TO_HIRAGANA_TABLE: &'static [(&str, &str)] = &[ - ("-","ー"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("la","ぁ"),("le","ぇ"),("li","ぃ"),("lka","ヵ"),("lke","ヶ"),("lo","ぉ"),("lu","ぅ"),("lya","ゃ"),("lyo","ょ"),("lyu","ゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsu","つ"),("tu","つ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("wo","を"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xka","ヵ"),("xke","ヶ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zo","ぞ"),("zu","ず") + ("-","ー"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("cye","ちぇ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("la","ぁ"),("le","ぇ"),("li","ぃ"),("lka","ヵ"),("lke","ヶ"),("lo","ぉ"),("lu","ぅ"),("lya","ゃ"),("lyo","ょ"),("lyu","ゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("sya","しゃ"),("sye","しぇ"),("syo","しょ"),("syu","しゅ"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tso","つぉ"),("tsu","つ"),("tu","つ"),("tya","ちゃ"),("tyo","ちょ"),("tyu","ちゅ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("wo","を"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xka","ヵ"),("xke","ヶ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zo","ぞ"),("zu","ず"),("zya","じゃ"),("zye","じぇ"),("zyo","じょ"),("zyu","じゅ") ]; diff --git a/src/standard/util.rs b/src/standard/util.rs index e9b4985..e2c6c56 100644 --- a/src/standard/util.rs +++ b/src/standard/util.rs @@ -39,16 +39,18 @@ pub fn romaji_to_hiragana_safe(romaji: &str) -> String { let romaji_peek_ahead_vec = &romaji.chars().collect::>()[romaji_index + 1 .. romaji_index + 2]; let romaji_peek_ahead: String = romaji_peek_ahead_vec.into_iter().collect(); - if romaji_sub == romaji_peek_ahead { - hiragana = format!("{}{}", hiragana, "っ"); - romaji_index += 1; - window = 1; - continue; - } else if romaji_sub == "n" { - hiragana = format!("{}{}", hiragana, "ん"); - romaji_index += 1; - window = 1; - continue; + if romaji_peek_ahead != "n" { + if romaji_sub == romaji_peek_ahead { + hiragana = format!("{}{}", hiragana, "っ"); + romaji_index += 1; + window = 1; + continue; + } else if romaji_sub == "n" { + hiragana = format!("{}{}", hiragana, "ん"); + romaji_index += 1; + window = 1; + continue; + } } } @@ -149,5 +151,8 @@ mod tests { assert_eq!(romaji_to_hiragana_safe("sme"), "sめ".to_string()); assert_eq!(romaji_to_hiragana_safe("wあり"), "wあり".to_string()); assert_eq!(romaji_to_hiragana_safe("nt"), "んt".to_string()); + assert_eq!(romaji_to_hiragana_safe("nn"), "ん".to_string()); + assert_eq!(romaji_to_hiragana_safe("tt"), "っt".to_string()); + assert_eq!(romaji_to_hiragana_safe("a--"), "あーー".to_string()); } } diff --git a/src/util.rs b/src/util.rs index 66ea1a8..4b5369b 100644 --- a/src/util.rs +++ b/src/util.rs @@ -65,7 +65,7 @@ pub fn lookups_string(s: &str,x: &str, table: &'static [(&str, &'static [(&str,& } pub fn is_consonant(s: &str) -> bool { - s.len() == 1 && !["a","e","i","n","o","u","y"].contains(&s) + s.len() == 1 && !["-","a","e","i","n","o","u","y"].contains(&s) } pub fn hiragana_has_consonant(s: &str) -> bool { From fbcafb525b07fad9ef78ae66babb86e072a3d6ba Mon Sep 17 00:00:00 2001 From: James Haver Date: Mon, 28 May 2018 02:55:50 +0800 Subject: [PATCH 15/16] Add more things --- src/standard/tables.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/standard/tables.rs b/src/standard/tables.rs index 667a1bb..2a4aac1 100644 --- a/src/standard/tables.rs +++ b/src/standard/tables.rs @@ -1,3 +1,3 @@ pub const ROMAJI_TO_HIRAGANA_TABLE: &'static [(&str, &str)] = &[ - ("-","ー"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("cye","ちぇ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("la","ぁ"),("le","ぇ"),("li","ぃ"),("lka","ヵ"),("lke","ヶ"),("lo","ぉ"),("lu","ぅ"),("lya","ゃ"),("lyo","ょ"),("lyu","ゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("sya","しゃ"),("sye","しぇ"),("syo","しょ"),("syu","しゅ"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tso","つぉ"),("tsu","つ"),("tu","つ"),("tya","ちゃ"),("tyo","ちょ"),("tyu","ちゅ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("wo","を"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xka","ヵ"),("xke","ヶ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zo","ぞ"),("zu","ず"),("zya","じゃ"),("zye","じぇ"),("zyo","じょ"),("zyu","じゅ") + ("-","ー"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("cye","ちぇ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("la","ぁ"),("le","ぇ"),("li","ぃ"),("lka","ヵ"),("lke","ヶ"),("lo","ぉ"),("lu","ぅ"),("lya","ゃ"),("lyo","ょ"),("lyu","ゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("sya","しゃ"),("sye","しぇ"),("syo","しょ"),("syu","しゅ"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tso","つぉ"),("tsu","つ"),("tu","つ"),("tya","ちゃ"),("tyo","ちょ"),("tyu","ちゅ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("we","ゑ"),("wi","ゐ"),("wo","を"),("wye","ゑ"),("wyi","ゐ"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xka","ヵ"),("xke","ヶ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zo","ぞ"),("zu","ず"),("zya","じゃ"),("zye","じぇ"),("zyo","じょ"),("zyu","じゅ") ]; From 7eb38fef8e4e86dbb120e98afdb1ca6adb5f1f0d Mon Sep 17 00:00:00 2001 From: James Haver Date: Tue, 29 May 2018 01:05:21 +0800 Subject: [PATCH 16/16] Fix small issues --- src/standard/tables.rs | 2 +- src/standard/util.rs | 5 +++-- src/util.rs | 4 ++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/standard/tables.rs b/src/standard/tables.rs index 2a4aac1..dffe865 100644 --- a/src/standard/tables.rs +++ b/src/standard/tables.rs @@ -1,3 +1,3 @@ pub const ROMAJI_TO_HIRAGANA_TABLE: &'static [(&str, &str)] = &[ - ("-","ー"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("cye","ちぇ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("la","ぁ"),("le","ぇ"),("li","ぃ"),("lka","ヵ"),("lke","ヶ"),("lo","ぉ"),("lu","ぅ"),("lya","ゃ"),("lyo","ょ"),("lyu","ゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("sya","しゃ"),("sye","しぇ"),("syo","しょ"),("syu","しゅ"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tso","つぉ"),("tsu","つ"),("tu","つ"),("tya","ちゃ"),("tyo","ちょ"),("tyu","ちゅ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("we","ゑ"),("wi","ゐ"),("wo","を"),("wye","ゑ"),("wyi","ゐ"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xka","ヵ"),("xke","ヶ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zo","ぞ"),("zu","ず"),("zya","じゃ"),("zye","じぇ"),("zyo","じょ"),("zyu","じゅ") + ("-","ー"),("a","あ"),("ba","ば"),("be","べ"),("bi","び"),("bo","ぼ"),("bu","ぶ"),("bya","びゃ"),("byo","びょう"),("byu","びゅ"),("cha","ちゃ"),("che","ちぇ"),("chi","ち"),("cho","ちょ"),("chu","ちゅ"),("cye","ちぇ"),("da","だ"),("de","で"),("di","ぢ"),("do","ど"),("du","づ"),("dya","ぢゃ"),("dye","ぢぇ"),("dyo","ぢょ"),("dyu","ぢゅ"),("e","え"),("fa","ふぁ"),("fe","ふぇ"),("fi","ふぃ"),("fo","ふぉ"),("fu","ふ"),("ga","が"),("ge","げ"),("gi","ぎ"),("go","ご"),("gu","ぐ"),("gya","ぎゃ"),("gyo","ぎょ"),("gyu","ぎゅ"),("ha","は"),("he","へ"),("hi","ひ"),("ho","ほ"),("hu","ふ"),("hya","ひゃ"),("hyo","ひょ"),("hyu","ひゅ"),("i","い"),("ja","じゃ"),("je","じぇ"),("ji","じ"),("jo","じょ"),("ju","じゅ"),("ka","か"),("ke","け"),("ki","き"),("ko","こ"),("ku","く"),("kya","きゃ"),("kyo","きょ"),("kyu","きゅ"),("la","ぁ"),("le","ぇ"),("li","ぃ"),("lka","ヵ"),("lke","ヶ"),("lo","ぉ"),("lu","ぅ"),("lya","ゃ"),("lyo","ょ"),("lyu","ゅ"),("ma","ま"),("me","め"),("mi","み"),("mo","も"),("mu","む"),("mya","みゃ"),("myo","みょ"),("myu","みゅ"),("na","な"),("ne","ね"),("ni","に"),("nn","ん"),("no","の"),("nu","ぬ"),("nya","にゃ"),("nyo","にょ"),("nyu","にゅ"),("o","お"),("pa","ぱ"),("pe","ぺ"),("pi","ぴ"),("po","ぽ"),("pu","ぷ"),("pya","ぴゃ"),("pyo","ぴょお"),("pyu","ぴゅ"),("ra","ら"),("re","れ"),("ri","り"),("ro","ろ"),("ru","る"),("rya","りゃ"),("ryo","りょ"),("ryu","りゅ"),("sa","さ"),("se","せ"),("sha","しゃ"),("she","しぇ"),("shi","し"),("sho","しょ"),("shu","しゅ"),("si","し"),("so","そ"),("su","す"),("sya","しゃ"),("sye","しぇ"),("syo","しょ"),("syu","しゅ"),("ta","た"),("te","て"),("ti","ち"),("to","と"),("tsa","つぁ"),("tse","つぇ"),("tsi","つぃ"),("tso","つぉ"),("tsu","つ"),("tu","つ"),("tya","ちゃ"),("tyo","ちょ"),("tyu","ちゅ"),("u","う"),("va","ゔぁ"),("ve","ゔぇ"),("vi","ゔぃ"),("vo","ゔぉ"),("vu","ゔ"),("wa","わ"),("we","ゑ"),("wi","ゐ"),("wo","を"),("wye","ゑ"),("wyi","ゐ"),("xa","ぁ"),("xe","ぇ"),("xi","ぃ"),("xka","ヵ"),("xke","ヶ"),("xo","ぉ"),("xu","ぅ"),("xtsu","っ"),("xtu","っ"),("xwa", "ゎ"),("xya","ゃ"),("xyo","ょ"),("xyu","ゅ"),("ya","や"),("yo","よ"),("yu","ゆ"),("za","ざ"),("ze","ぜ"),("zi","じ"),("zo","ぞ"),("zu","ず"),("zya","じゃ"),("zye","じぇ"),("zyo","じょ"),("zyu","じゅ") ]; diff --git a/src/standard/util.rs b/src/standard/util.rs index e2c6c56..fb8cdc4 100644 --- a/src/standard/util.rs +++ b/src/standard/util.rs @@ -39,13 +39,13 @@ pub fn romaji_to_hiragana_safe(romaji: &str) -> String { let romaji_peek_ahead_vec = &romaji.chars().collect::>()[romaji_index + 1 .. romaji_index + 2]; let romaji_peek_ahead: String = romaji_peek_ahead_vec.into_iter().collect(); - if romaji_peek_ahead != "n" { + if romaji_peek_ahead != "n" && !util::is_vowel(&romaji_peek_ahead) { if romaji_sub == romaji_peek_ahead { hiragana = format!("{}{}", hiragana, "っ"); romaji_index += 1; window = 1; continue; - } else if romaji_sub == "n" { + } else if romaji_sub == "n"{ hiragana = format!("{}{}", hiragana, "ん"); romaji_index += 1; window = 1; @@ -152,6 +152,7 @@ mod tests { assert_eq!(romaji_to_hiragana_safe("wあり"), "wあり".to_string()); assert_eq!(romaji_to_hiragana_safe("nt"), "んt".to_string()); assert_eq!(romaji_to_hiragana_safe("nn"), "ん".to_string()); + assert_eq!(romaji_to_hiragana_safe("na"), "な".to_string()); assert_eq!(romaji_to_hiragana_safe("tt"), "っt".to_string()); assert_eq!(romaji_to_hiragana_safe("a--"), "あーー".to_string()); } diff --git a/src/util.rs b/src/util.rs index 4b5369b..7b17eac 100644 --- a/src/util.rs +++ b/src/util.rs @@ -68,6 +68,10 @@ pub fn is_consonant(s: &str) -> bool { s.len() == 1 && !["-","a","e","i","n","o","u","y"].contains(&s) } +pub fn is_vowel(s: &str) -> bool { + s.len() == 1 && ["a","e","i","o","u"].contains(&s) +} + pub fn hiragana_has_consonant(s: &str) -> bool { s.len() == 1 && !["a","e","i","n","o","u","y"].contains(&s) }