-
Notifications
You must be signed in to change notification settings - Fork 107
Closed
Description
不确定是不是该提交 PR,我从 unicode.org 下载了目前最新的 17.0.0 Unihan.zip
代码
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let context = reqwest::get("http://www.unicode.org/Public/17.0.0/ucd/Unihan.zip").await?
.error_for_status()?.bytes().await?;
let mut archive = zip::ZipArchive::new(std::io::Cursor::new(context))?;
let entry= archive.by_name("Unihan_Readings.txt")?;
let context = std::io::read_to_string(entry)?;
let mut pinyin = indexmap::IndexMap::<_, Vec<_>>::new();
for line in context.lines() {
if line.starts_with('#') || line.is_empty() {
continue;
}
let mut iter = line.splitn(3, '\t');
let id = iter.next().unwrap();
let kind = iter.next().unwrap();
let extra = iter.next().unwrap();
if kind == "kMandarin" {
match pinyin.entry(id) {
indexmap::map::Entry::Occupied(mut entry) => {
let vec = entry.get_mut();
vec.insert(0, extra);
let old = vec.iter().enumerate().skip(1).find(|(_, pinyin)| **pinyin == extra);
if let Some((index, _)) = old {
vec.remove(index);
}
},
indexmap::map::Entry::Vacant(entry) => {
entry.insert(Vec::new()).push(extra);
},
}
}
if kind == "kHanyuPinyin" {
let multi = extra.split(':').last().unwrap().split(',');
pinyin.entry(id).or_default().extend(multi);
}
}
let mut file = std::fs::File::create("pinyin.txt")?;
for (id, pinyin) in pinyin {
let hex = id.trim_start_matches("U+");
let code_point = u32::from_str_radix(hex, 16)?;
let ch = char::from_u32(code_point).unwrap();
use std::io::Write;
writeln!(file, "{id}: {} # {ch}", pinyin.join(","))?;
}
Ok(())
}Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels