Skip to content

更新pinyin.txt #200

@xuxiaocheng0201

Description

@xuxiaocheng0201

不确定是不是该提交 PR,我从 unicode.org 下载了目前最新的 17.0.0 Unihan.zip

代码
#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let context = reqwest::get("http://www.unicode.org/Public/17.0.0/ucd/Unihan.zip").await?
        .error_for_status()?.bytes().await?;
    let mut archive = zip::ZipArchive::new(std::io::Cursor::new(context))?;
    let entry= archive.by_name("Unihan_Readings.txt")?;
    let context = std::io::read_to_string(entry)?;
    let mut pinyin = indexmap::IndexMap::<_, Vec<_>>::new();
    for line in context.lines() {
        if line.starts_with('#') || line.is_empty() {
            continue;
        }
        let mut iter = line.splitn(3, '\t');
        let id = iter.next().unwrap();
        let kind = iter.next().unwrap();
        let extra = iter.next().unwrap();
        if kind == "kMandarin" {
            match pinyin.entry(id) {
                indexmap::map::Entry::Occupied(mut entry) => {
                    let vec = entry.get_mut();
                    vec.insert(0, extra);
                    let old = vec.iter().enumerate().skip(1).find(|(_, pinyin)| **pinyin == extra);
                    if let Some((index, _)) = old {
                        vec.remove(index);
                    }
                },
                indexmap::map::Entry::Vacant(entry) => {
                    entry.insert(Vec::new()).push(extra);
                },
            }
        }
        if kind == "kHanyuPinyin" {
            let multi = extra.split(':').last().unwrap().split(',');
            pinyin.entry(id).or_default().extend(multi);
        }
    }
    let mut file = std::fs::File::create("pinyin.txt")?;
    for (id, pinyin) in pinyin {
        let hex = id.trim_start_matches("U+");
        let code_point = u32::from_str_radix(hex, 16)?;
        let ch = char::from_u32(code_point).unwrap();
        use std::io::Write;
        writeln!(file, "{id}: {} # {ch}", pinyin.join(","))?;
    }
    Ok(())
}
原仓库(https://github.com/mozillazg/pinyin-data)的内容有些多,不确定哪些文件需要更改

pinyin.txt

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions