diff --git a/mozuku-lsp/CMakeLists.txt b/mozuku-lsp/CMakeLists.txt index 1a9c4a2..f202851 100644 --- a/mozuku-lsp/CMakeLists.txt +++ b/mozuku-lsp/CMakeLists.txt @@ -69,6 +69,13 @@ function(add_tree_sitter_language target_name source_root) endfunction() function(add_tree_sitter_language_with_fallback target_name package_name source_root) + # 可能ならthird-partyのソースを静的リンクする + if(EXISTS "${source_root}/src/parser.c") + message(STATUS "third-partyから${package_name}をビルドします") + add_tree_sitter_language(${target_name} "${source_root}") + return() + endif() + file(GLOB NIX_PARSER_PATHS "/nix/store/*-${package_name}-grammar-*/parser") if(NIX_PARSER_PATHS) list(GET NIX_PARSER_PATHS 0 NIX_PARSER_PATH) @@ -90,13 +97,8 @@ function(add_tree_sitter_language_with_fallback target_name package_name source_ add_library(${target_name} INTERFACE IMPORTED GLOBAL) target_link_libraries(${target_name} INTERFACE ${${target_name}_SYSTEM_LIB}) else() - if(EXISTS "${source_root}/src/parser.c") - message(STATUS "third-partyから${package_name}をビルドします") - add_tree_sitter_language(${target_name} "${source_root}") - else() - message(WARNING "${package_name}が見つかりません。システムにもthird-partyにもライブラリが存在しません。") - add_library(${target_name} INTERFACE IMPORTED GLOBAL) - endif() + message(WARNING "${package_name}が見つかりません。システムにもthird-partyにもライブラリが存在しません。") + add_library(${target_name} INTERFACE IMPORTED GLOBAL) endif() endfunction() diff --git a/mozuku-lsp/include/text_processor.hpp b/mozuku-lsp/include/text_processor.hpp index 7ed8b74..a197e27 100644 --- a/mozuku-lsp/include/text_processor.hpp +++ b/mozuku-lsp/include/text_processor.hpp @@ -14,6 +14,8 @@ class TextProcessor { static std::vector splitIntoSentences(const std::string &text); + static double calculateJapaneseRatio(const std::string &text); + static bool isJapanesePunctuation(const std::string &text, size_t pos); static size_t skipWhitespace(const std::string &text, size_t pos); @@ -21,6 +23,10 @@ class TextProcessor { private: static bool isValidUtf8Sequence(const std::string &input, size_t pos, size_t seqLen); + static uint32_t decodeCodepoint(const std::string &text, size_t pos, + size_t seqLen); + static bool isWhitespaceCodepoint(uint32_t codepoint); + static bool isJapaneseCodepoint(uint32_t codepoint); }; } // namespace text diff --git a/mozuku-lsp/src/analyzer.cpp b/mozuku-lsp/src/analyzer.cpp index 112fc80..4b80368 100644 --- a/mozuku-lsp/src/analyzer.cpp +++ b/mozuku-lsp/src/analyzer.cpp @@ -23,9 +23,7 @@ static bool isDebugEnabled() { return debug; } -Analyzer::Analyzer() - : mecab_manager_(std::make_unique(true)) { - +Analyzer::Analyzer() { if (isDebugEnabled()) { std::cerr << "[DEBUG] Analyzer created" << std::endl; } @@ -35,6 +33,8 @@ Analyzer::~Analyzer() = default; bool Analyzer::initialize(const MoZukuConfig &config) { config_ = config; + mecab_manager_ = + std::make_unique(config.analysis.enableCaboCha); if (isDebugEnabled()) { std::cerr << "[DEBUG] Initializing analyzer with config" << std::endl; @@ -73,6 +73,16 @@ std::vector Analyzer::analyzeText(const std::string &text) { } std::string cleanText = text::TextProcessor::sanitizeUTF8(text); + double japaneseRatio = text::TextProcessor::calculateJapaneseRatio(cleanText); + if (config_.analysis.minJapaneseRatio > 0.0 && + japaneseRatio < config_.analysis.minJapaneseRatio) { + if (isDebugEnabled()) { + std::cerr << "[DEBUG] Skipping analysis due to low Japanese ratio: " + << japaneseRatio << " < " << config_.analysis.minJapaneseRatio + << std::endl; + } + return tokens; + } std::string systemText = encoding::utf8ToSystem(cleanText, system_charset_); @@ -153,6 +163,18 @@ std::vector Analyzer::checkGrammar(const std::string &text) { return diagnostics; } + std::string cleanText = text::TextProcessor::sanitizeUTF8(text); + double japaneseRatio = text::TextProcessor::calculateJapaneseRatio(cleanText); + if (config_.analysis.minJapaneseRatio > 0.0 && + japaneseRatio < config_.analysis.minJapaneseRatio) { + if (isDebugEnabled()) { + std::cerr << "[DEBUG] Skipping grammar check due to low Japanese ratio: " + << japaneseRatio << " < " << config_.analysis.minJapaneseRatio + << std::endl; + } + return diagnostics; + } + if (isDebugEnabled()) { std::cerr << "[DEBUG] Starting grammar check" << std::endl; } diff --git a/mozuku-lsp/src/lsp.cpp b/mozuku-lsp/src/lsp.cpp index f483e70..6e0ea84 100644 --- a/mozuku-lsp/src/lsp.cpp +++ b/mozuku-lsp/src/lsp.cpp @@ -33,6 +33,25 @@ struct LocalByteRange { size_t endByte{0}; }; +bool readBoolOption(const json &obj, const char *key, bool &out) { + if (!obj.contains(key)) { + return false; + } + + const auto &value = obj[key]; + if (value.is_boolean()) { + out = value.get(); + return true; + } + + if (value.is_number_integer()) { + out = value.get() != 0; + return true; + } + + return false; +} + bool isEscaped(const std::string &text, size_t pos) { size_t count = 0; while (pos > count && text[pos - count - 1] == '\\') { @@ -411,6 +430,9 @@ json LSPServer::onInitialize(const json &id, const json ¶ms) { // initializationOptionsから設定を抽出 if (params.contains("initializationOptions")) { auto opts = params["initializationOptions"]; + if (opts.contains("mozuku") && opts["mozuku"].is_object()) { + opts = opts["mozuku"]; + } // MeCab設定 if (opts.contains("mecab")) { @@ -426,14 +448,8 @@ json LSPServer::onInitialize(const json &id, const json ¶ms) { // 解析設定 if (opts.contains("analysis")) { auto analysis = opts["analysis"]; - if (analysis.contains("enableCaboCha") && - analysis["enableCaboCha"].is_boolean()) { - config_.analysis.enableCaboCha = analysis["enableCaboCha"]; - } - if (analysis.contains("grammarCheck") && - analysis["grammarCheck"].is_boolean()) { - config_.analysis.grammarCheck = analysis["grammarCheck"]; - } + readBoolOption(analysis, "enableCaboCha", config_.analysis.enableCaboCha); + readBoolOption(analysis, "grammarCheck", config_.analysis.grammarCheck); if (analysis.contains("minJapaneseRatio") && analysis["minJapaneseRatio"].is_number()) { config_.analysis.minJapaneseRatio = analysis["minJapaneseRatio"]; @@ -446,63 +462,33 @@ json LSPServer::onInitialize(const json &id, const json ¶ms) { // 警告レベル設定 if (analysis.contains("warnings") && analysis["warnings"].is_object()) { auto warnings = analysis["warnings"]; - if (warnings.contains("particleDuplicate") && - warnings["particleDuplicate"].is_boolean()) { - config_.analysis.warnings.particleDuplicate = - warnings["particleDuplicate"]; - } - if (warnings.contains("particleSequence") && - warnings["particleSequence"].is_boolean()) { - config_.analysis.warnings.particleSequence = - warnings["particleSequence"]; - } - if (warnings.contains("particleMismatch") && - warnings["particleMismatch"].is_boolean()) { - config_.analysis.warnings.particleMismatch = - warnings["particleMismatch"]; - } - if (warnings.contains("sentenceStructure") && - warnings["sentenceStructure"].is_boolean()) { - config_.analysis.warnings.sentenceStructure = - warnings["sentenceStructure"]; - } - if (warnings.contains("styleConsistency") && - warnings["styleConsistency"].is_boolean()) { - config_.analysis.warnings.styleConsistency = - warnings["styleConsistency"]; - } - if (warnings.contains("redundancy") && - warnings["redundancy"].is_boolean()) { - config_.analysis.warnings.redundancy = warnings["redundancy"]; - } + readBoolOption(warnings, "particleDuplicate", + config_.analysis.warnings.particleDuplicate); + readBoolOption(warnings, "particleSequence", + config_.analysis.warnings.particleSequence); + readBoolOption(warnings, "particleMismatch", + config_.analysis.warnings.particleMismatch); + readBoolOption(warnings, "sentenceStructure", + config_.analysis.warnings.sentenceStructure); + readBoolOption(warnings, "styleConsistency", + config_.analysis.warnings.styleConsistency); + readBoolOption(warnings, "redundancy", + config_.analysis.warnings.redundancy); } // ルールの有効/無効設定 if (analysis.contains("rules") && analysis["rules"].is_object()) { auto rules = analysis["rules"]; - if (rules.contains("commaLimit") && rules["commaLimit"].is_boolean()) { - config_.analysis.rules.commaLimit = rules["commaLimit"]; - } - if (rules.contains("adversativeGa") && - rules["adversativeGa"].is_boolean()) { - config_.analysis.rules.adversativeGa = rules["adversativeGa"]; - } - if (rules.contains("duplicateParticleSurface") && - rules["duplicateParticleSurface"].is_boolean()) { - config_.analysis.rules.duplicateParticleSurface = - rules["duplicateParticleSurface"]; - } - if (rules.contains("adjacentParticles") && - rules["adjacentParticles"].is_boolean()) { - config_.analysis.rules.adjacentParticles = rules["adjacentParticles"]; - } - if (rules.contains("conjunctionRepeat") && - rules["conjunctionRepeat"].is_boolean()) { - config_.analysis.rules.conjunctionRepeat = rules["conjunctionRepeat"]; - } - if (rules.contains("raDropping") && rules["raDropping"].is_boolean()) { - config_.analysis.rules.raDropping = rules["raDropping"]; - } + readBoolOption(rules, "commaLimit", config_.analysis.rules.commaLimit); + readBoolOption(rules, "adversativeGa", + config_.analysis.rules.adversativeGa); + readBoolOption(rules, "duplicateParticleSurface", + config_.analysis.rules.duplicateParticleSurface); + readBoolOption(rules, "adjacentParticles", + config_.analysis.rules.adjacentParticles); + readBoolOption(rules, "conjunctionRepeat", + config_.analysis.rules.conjunctionRepeat); + readBoolOption(rules, "raDropping", config_.analysis.rules.raDropping); if (rules.contains("commaLimitMax") && rules["commaLimitMax"].is_number_integer()) { config_.analysis.rules.commaLimitMax = rules["commaLimitMax"]; diff --git a/mozuku-lsp/src/text_processor.cpp b/mozuku-lsp/src/text_processor.cpp index b40673f..2fa03b9 100644 --- a/mozuku-lsp/src/text_processor.cpp +++ b/mozuku-lsp/src/text_processor.cpp @@ -186,6 +186,52 @@ TextProcessor::splitIntoSentences(const std::string &text) { return sentences; } +double TextProcessor::calculateJapaneseRatio(const std::string &text) { + if (text.empty()) { + return 0.0; + } + + size_t japaneseCount = 0; + size_t visibleCount = 0; + + for (size_t i = 0; i < text.size(); ++i) { + unsigned char c = static_cast(text[i]); + size_t seqLen = 1; + + if (c < 0x80) { + seqLen = 1; + } else if ((c & 0xE0) == 0xC0) { + seqLen = 2; + } else if ((c & 0xF0) == 0xE0) { + seqLen = 3; + } else if ((c & 0xF8) == 0xF0) { + seqLen = 4; + } else { + continue; + } + + if (!isValidUtf8Sequence(text, i, seqLen)) { + continue; + } + + uint32_t codepoint = decodeCodepoint(text, i, seqLen); + if (!isWhitespaceCodepoint(codepoint)) { + ++visibleCount; + if (isJapaneseCodepoint(codepoint)) { + ++japaneseCount; + } + } + + i += seqLen - 1; + } + + if (visibleCount == 0) { + return 0.0; + } + + return static_cast(japaneseCount) / static_cast(visibleCount); +} + bool TextProcessor::isJapanesePunctuation(const std::string &text, size_t pos) { if (pos + 2 >= text.size()) return false; @@ -237,5 +283,47 @@ bool TextProcessor::isValidUtf8Sequence(const std::string &input, size_t pos, return true; } +uint32_t TextProcessor::decodeCodepoint(const std::string &text, size_t pos, + size_t seqLen) { + const unsigned char c0 = static_cast(text[pos]); + if (seqLen == 1) { + return c0; + } + + const unsigned char c1 = static_cast(text[pos + 1]); + if (seqLen == 2) { + return (static_cast(c0 & 0x1F) << 6) | + static_cast(c1 & 0x3F); + } + + const unsigned char c2 = static_cast(text[pos + 2]); + if (seqLen == 3) { + return (static_cast(c0 & 0x0F) << 12) | + (static_cast(c1 & 0x3F) << 6) | + static_cast(c2 & 0x3F); + } + + const unsigned char c3 = static_cast(text[pos + 3]); + return (static_cast(c0 & 0x07) << 18) | + (static_cast(c1 & 0x3F) << 12) | + (static_cast(c2 & 0x3F) << 6) | + static_cast(c3 & 0x3F); +} + +bool TextProcessor::isWhitespaceCodepoint(uint32_t codepoint) { + return codepoint == 0x09 || codepoint == 0x0A || codepoint == 0x0D || + codepoint == 0x20 || codepoint == 0x3000; +} + +bool TextProcessor::isJapaneseCodepoint(uint32_t codepoint) { + return (codepoint >= 0x3040 && codepoint <= 0x309F) || + (codepoint >= 0x30A0 && codepoint <= 0x30FF) || + (codepoint >= 0x31F0 && codepoint <= 0x31FF) || + (codepoint >= 0x3400 && codepoint <= 0x4DBF) || + (codepoint >= 0x4E00 && codepoint <= 0x9FFF) || + (codepoint >= 0x3000 && codepoint <= 0x303F) || + (codepoint >= 0xFF66 && codepoint <= 0xFF9F); +} + } // namespace text } // namespace MoZuku diff --git a/vim-mozuku/README.md b/vim-mozuku/README.md new file mode 100644 index 0000000..d2f6af6 --- /dev/null +++ b/vim-mozuku/README.md @@ -0,0 +1,3 @@ +# vim-mozuku + +MoZuku LSP を Vim/Neovim で使うための軽量プラグインです。`vim-mozuku/` を runtimepath に追加することで有効化できます。 diff --git a/vim-mozuku/autoload/mozuku.vim b/vim-mozuku/autoload/mozuku.vim new file mode 100644 index 0000000..8bc6195 --- /dev/null +++ b/vim-mozuku/autoload/mozuku.vim @@ -0,0 +1,487 @@ +let s:semantic_types = [ + \ 'noun', + \ 'verb', + \ 'adjective', + \ 'adverb', + \ 'particle', + \ 'aux', + \ 'conjunction', + \ 'symbol', + \ 'interj', + \ 'prefix', + \ 'suffix', + \ 'unknown', + \ ] + +function! mozuku#init() abort + call mozuku#ensure_config() + call mozuku#ensure_highlights() + + if has('nvim') + call mozuku#nvim_setup() + elseif exists('*lsp#register_server') + call mozuku#vim_lsp_setup() + else + if !exists('g:mozuku_warned') + echohl WarningMsg + echom '[mozuku] Neovim 0.5+ または vim-lsp が必要です。Vim組み込みLSPへの対応は未確定です。' + echohl None + let g:mozuku_warned = 1 + endif + endif +endfunction + +function! mozuku#ensure_config() abort + if !exists('g:mozuku_server_path') + let g:mozuku_server_path = 'mozuku-lsp' + endif + if !exists('g:mozuku_mecab_dicdir') + let g:mozuku_mecab_dicdir = '' + endif + if !exists('g:mozuku_mecab_charset') + let g:mozuku_mecab_charset = 'UTF-8' + endif + if !exists('g:mozuku_analysis_enable_cabocha') + let g:mozuku_analysis_enable_cabocha = 1 + endif + if !exists('g:mozuku_analysis_grammar_check') + let g:mozuku_analysis_grammar_check = 1 + endif + if !exists('g:mozuku_analysis_min_japanese_ratio') + let g:mozuku_analysis_min_japanese_ratio = 0.1 + endif + if !exists('g:mozuku_analysis_warning_min_severity') + let g:mozuku_analysis_warning_min_severity = 2 + endif + + if !exists('g:mozuku_warnings') + let g:mozuku_warnings = {} + endif + call s:default_bool(g:mozuku_warnings, 'particleDuplicate', 1) + call s:default_bool(g:mozuku_warnings, 'particleSequence', 1) + call s:default_bool(g:mozuku_warnings, 'particleMismatch', 1) + call s:default_bool(g:mozuku_warnings, 'sentenceStructure', 0) + call s:default_bool(g:mozuku_warnings, 'styleConsistency', 0) + call s:default_bool(g:mozuku_warnings, 'redundancy', 0) + + if !exists('g:mozuku_rules') + let g:mozuku_rules = {} + endif + call s:default_bool(g:mozuku_rules, 'commaLimit', 1) + call s:default_bool(g:mozuku_rules, 'adversativeGa', 1) + call s:default_bool(g:mozuku_rules, 'duplicateParticleSurface', 1) + call s:default_bool(g:mozuku_rules, 'adjacentParticles', 1) + call s:default_bool(g:mozuku_rules, 'conjunctionRepeat', 1) + call s:default_bool(g:mozuku_rules, 'raDropping', 1) + call s:default_num(g:mozuku_rules, 'commaLimitMax', 3) + call s:default_num(g:mozuku_rules, 'adversativeGaMax', 1) + call s:default_num(g:mozuku_rules, 'duplicateParticleSurfaceMaxRepeat', 1) + call s:default_num(g:mozuku_rules, 'adjacentParticlesMaxRepeat', 1) + call s:default_num(g:mozuku_rules, 'conjunctionRepeatMax', 1) +endfunction + +function! s:default_bool(dict, key, value) abort + if !has_key(a:dict, a:key) + let a:dict[a:key] = a:value + endif +endfunction + +function! s:default_num(dict, key, value) abort + if !has_key(a:dict, a:key) + let a:dict[a:key] = a:value + endif +endfunction + +function! mozuku#ensure_highlights() abort + highlight default MozukuNoun guifg=#c8c8c8 ctermfg=250 + highlight default MozukuVerb guifg=#569cd6 ctermfg=75 + highlight default MozukuAdjective guifg=#4fc1ff ctermfg=81 + highlight default MozukuAdverb guifg=#9cdcfe ctermfg=117 + highlight default MozukuParticle guifg=#d16969 ctermfg=167 + highlight default MozukuAux guifg=#87ceeb ctermfg=117 + highlight default MozukuConjunction guifg=#d7ba7d ctermfg=180 + highlight default MozukuSymbol guifg=#808080 ctermfg=244 + highlight default MozukuInterj guifg=#b5cea8 ctermfg=151 + highlight default MozukuPrefix guifg=#c8c8c8 ctermfg=250 + highlight default MozukuSuffix guifg=#c8c8c8 ctermfg=250 + highlight default MozukuUnknown guifg=#aaaaaa ctermfg=248 + highlight default MozukuComment gui=NONE cterm=NONE + highlight default MozukuContent gui=NONE cterm=NONE + + if exists('*prop_type_add') + call s:ensure_prop_types() + endif +endfunction + +function! s:ensure_prop_types() abort + if exists('g:mozuku_prop_types_initialized') + return + endif + let g:mozuku_prop_types_initialized = 1 + + call prop_type_add('MozukuComment', {'highlight': 'MozukuComment', 'priority': 200}) + call prop_type_add('MozukuContent', {'highlight': 'MozukuContent', 'priority': 200}) + + for l:type in s:semantic_types + call prop_type_add('Mozuku' . s:capitalize(l:type), {'highlight': 'Mozuku' . s:capitalize(l:type), 'priority': 200}) + endfor +endfunction + +function! s:capitalize(word) abort + return toupper(strpart(a:word, 0, 1)) . strpart(a:word, 1) +endfunction + +function! mozuku#config() abort + return { + \ 'server_path': g:mozuku_server_path, + \ 'init_options': mozuku#build_init_options(), + \ } +endfunction + +function! mozuku#vim_allowlist() abort + let l:allowlist = copy(g:mozuku_filetypes) + for l:filetype in ['text', 'markdown'] + if index(l:allowlist, l:filetype) < 0 + call add(l:allowlist, l:filetype) + endif + endfor + return l:allowlist +endfunction + +function! mozuku#is_ja_document(bufnr) abort + let l:path = bufname(a:bufnr) + return l:path =~# '\.ja\.\(txt\|md\)$' +endfunction + +function! mozuku#vim_language_id(server_info) abort + let l:filetype = getbufvar(bufnr('%'), '&filetype') + if l:filetype ==# 'tex' || l:filetype ==# 'plaintex' + return 'latex' + endif + return l:filetype +endfunction + +function! mozuku#vim_server_cmd(server_info) abort + let l:bufnr = bufnr('%') + let l:filetype = getbufvar(l:bufnr, '&filetype') + + if index(['text', 'markdown'], l:filetype) >= 0 && !mozuku#is_ja_document(l:bufnr) + return [] + endif + + return mozuku#server_cmd() +endfunction + +function! mozuku#build_init_options() abort + return { + \ 'mozuku': { + \ 'mecab': { + \ 'dicdir': g:mozuku_mecab_dicdir, + \ 'charset': g:mozuku_mecab_charset, + \ }, + \ 'analysis': { + \ 'enableCaboCha': g:mozuku_analysis_enable_cabocha, + \ 'grammarCheck': g:mozuku_analysis_grammar_check, + \ 'minJapaneseRatio': g:mozuku_analysis_min_japanese_ratio, + \ 'warningMinSeverity': g:mozuku_analysis_warning_min_severity, + \ 'warnings': copy(g:mozuku_warnings), + \ 'rules': copy(g:mozuku_rules), + \ }, + \ } + \ } +endfunction + +function! mozuku#maybe_start(force) abort + if exists('b:mozuku_attached') && b:mozuku_attached + return + endif + if !a:force && index(g:mozuku_filetypes, &filetype) < 0 + return + endif + + if has('nvim') + let l:config = mozuku#config() + call luaeval("require('mozuku').start(_A.bufnr, _A.config)", {'bufnr': bufnr('%'), 'config': l:config}) + let b:mozuku_attached = 1 + return + endif + + if exists('*lsp#register_server') + call mozuku#vim_lsp_setup() + if exists('*lsp#enable') + call lsp#enable() + endif + let b:mozuku_attached = 1 + return + endif +endfunction + +function! mozuku#apply_current() abort + if has('nvim') + call luaeval("require('mozuku').apply(_A)", bufnr('%')) + return + endif + call mozuku#vim_apply_from_state(bufnr('%')) +endfunction + +function! mozuku#nvim_setup() abort + if exists('g:mozuku_nvim_initialized') + return + endif + let g:mozuku_nvim_initialized = 1 + call luaeval("require('mozuku').setup()") +endfunction + +function! mozuku#vim_lsp_setup() abort + if exists('g:mozuku_vim_lsp_registered') + return + endif + let g:mozuku_vim_lsp_registered = 1 + + if !exists('*lsp#register_server') + return + endif + + let l:init = mozuku#build_init_options() + + call lsp#register_server({ + \ 'name': 'mozuku', + \ 'cmd': function('mozuku#vim_server_cmd'), + \ 'allowlist': mozuku#vim_allowlist(), + \ 'initialization_options': l:init, + \ 'languageId': function('mozuku#vim_language_id'), + \ }) + + if exists('*lsp#register_notification_handler') + call lsp#register_notification_handler('mozuku/commentHighlights', function('mozuku#vim_on_comment')) + call lsp#register_notification_handler('mozuku/contentHighlights', function('mozuku#vim_on_content')) + call lsp#register_notification_handler('mozuku/semanticHighlights', function('mozuku#vim_on_semantic')) + endif +endfunction + +function! mozuku#server_cmd() abort + if type(g:mozuku_server_path) == v:t_list + return g:mozuku_server_path + endif + return [g:mozuku_server_path] +endfunction + +function! mozuku#vim_on_comment(server, payload) abort + call mozuku#vim_store_and_apply('comment', a:payload) +endfunction + +function! mozuku#vim_on_content(server, payload) abort + call mozuku#vim_store_and_apply('content', a:payload) +endfunction + +function! mozuku#vim_on_semantic(server, payload) abort + call mozuku#vim_store_and_apply('semantic', a:payload) +endfunction + +function! mozuku#vim_store_and_apply(kind, payload) abort + if type(a:payload) != v:t_dict || !has_key(a:payload, 'uri') + return + endif + let l:uri = a:payload['uri'] + if !exists('g:mozuku_state') + let g:mozuku_state = {} + endif + if !has_key(g:mozuku_state, a:kind) + let g:mozuku_state[a:kind] = {} + endif + let g:mozuku_state[a:kind][l:uri] = a:payload + + let l:bufnr = mozuku#uri_to_bufnr(l:uri) + if l:bufnr > 0 + call mozuku#vim_apply_from_state(l:bufnr) + endif +endfunction + +function! mozuku#vim_apply_from_state(bufnr) abort + if !exists('g:mozuku_state') + return + endif + let l:uri = mozuku#bufnr_to_uri(a:bufnr) + if l:uri ==# '' + return + endif + + if has_key(g:mozuku_state, 'semantic') && has_key(g:mozuku_state['semantic'], l:uri) + call mozuku#vim_apply_semantic(a:bufnr, g:mozuku_state['semantic'][l:uri]) + else + call mozuku#vim_clear_semantic(a:bufnr) + call setbufvar(a:bufnr, 'mozuku_has_semantic', 0) + endif + + if has_key(g:mozuku_state, 'comment') && has_key(g:mozuku_state['comment'], l:uri) + call mozuku#vim_apply_comment(a:bufnr, g:mozuku_state['comment'][l:uri]) + else + call mozuku#vim_clear_comment(a:bufnr) + endif + + if has_key(g:mozuku_state, 'content') && has_key(g:mozuku_state['content'], l:uri) + if getbufvar(a:bufnr, 'mozuku_has_semantic', 0) + call mozuku#vim_clear_content(a:bufnr) + else + call mozuku#vim_apply_content(a:bufnr, g:mozuku_state['content'][l:uri]) + endif + else + call mozuku#vim_clear_content(a:bufnr) + endif +endfunction + +function! mozuku#vim_apply_comment(bufnr, payload) abort + call mozuku#vim_clear_comment(a:bufnr) + if !has_key(a:payload, 'ranges') + return + endif + call s:vim_apply_ranges(a:bufnr, a:payload['ranges'], 'MozukuComment') +endfunction + +function! mozuku#vim_apply_content(bufnr, payload) abort + call mozuku#vim_clear_content(a:bufnr) + if !has_key(a:payload, 'ranges') + return + endif + call s:vim_apply_ranges(a:bufnr, a:payload['ranges'], 'MozukuContent') +endfunction + +function! mozuku#vim_apply_semantic(bufnr, payload) abort + call mozuku#vim_clear_semantic(a:bufnr) + call setbufvar(a:bufnr, 'mozuku_has_semantic', 0) + if !has_key(a:payload, 'tokens') + return + endif + for l:token in a:payload['tokens'] + if !has_key(l:token, 'range') || !has_key(l:token, 'type') + continue + endif + let l:group = 'Mozuku' . s:capitalize(l:token['type']) + call s:vim_apply_ranges(a:bufnr, [l:token['range']], l:group) + call setbufvar(a:bufnr, 'mozuku_has_semantic', 1) + endfor +endfunction + +function! mozuku#vim_clear_comment(bufnr) abort + if !exists('*prop_remove') + return + endif + call prop_remove({'type': 'MozukuComment', 'bufnr': a:bufnr}) +endfunction + +function! mozuku#vim_clear_content(bufnr) abort + if !exists('*prop_remove') + return + endif + call prop_remove({'type': 'MozukuContent', 'bufnr': a:bufnr}) +endfunction + +function! mozuku#vim_clear_semantic(bufnr) abort + if !exists('*prop_remove') + return + endif + for l:type in s:semantic_types + let l:prop = 'Mozuku' . s:capitalize(l:type) + call prop_remove({'type': l:prop, 'bufnr': a:bufnr}) + endfor +endfunction + +function! s:vim_apply_ranges(bufnr, ranges, group) abort + if !exists('*prop_add') + return + endif + for l:range in a:ranges + if !has_key(l:range, 'start') || !has_key(l:range, 'end') + continue + endif + let l:start = l:range['start'] + let l:end = l:range['end'] + + let l:start_lnum = l:start['line'] + 1 + let l:end_lnum = l:end['line'] + 1 + let l:start_list = getbufline(a:bufnr, l:start_lnum) + let l:end_list = getbufline(a:bufnr, l:end_lnum) + if empty(l:start_list) || empty(l:end_list) + continue + endif + let l:start_line = l:start_list[0] + let l:end_line = l:end_list[0] + + let l:start_col = mozuku#utf16_to_byte(l:start_line, l:start['character']) + 1 + let l:end_col = mozuku#utf16_to_byte(l:end_line, l:end['character']) + 1 + + if l:start_col < 1 + let l:start_col = 1 + endif + if l:end_col < 1 + let l:end_col = 1 + endif + + call prop_add(l:start_lnum, l:start_col, { + \ 'end_lnum': l:end_lnum, + \ 'end_col': l:end_col, + \ 'type': a:group, + \ 'bufnr': a:bufnr, + \ }) + endfor +endfunction + +function! mozuku#utf16_to_byte(line, utf16_col) abort + if a:utf16_col <= 0 + return 0 + endif + let l:chars = strchars(a:line) + let l:acc = 0 + for l:i in range(0, l:chars - 1) + if l:acc >= a:utf16_col + return byteidx(a:line, l:i) + endif + let l:char = strcharpart(a:line, l:i, 1) + let l:code = char2nr(l:char) + if l:code > 0xFFFF + let l:acc += 2 + else + let l:acc += 1 + endif + endfor + return strlen(a:line) +endfunction + +function! mozuku#uri_to_bufnr(uri) abort + let l:path = mozuku#uri_to_path(a:uri) + if l:path ==# '' + return -1 + endif + return bufnr(l:path) +endfunction + +function! mozuku#bufnr_to_uri(bufnr) abort + let l:path = bufname(a:bufnr) + if l:path ==# '' + return '' + endif + if exists('*lsp#utils#path_to_uri') + return lsp#utils#path_to_uri(l:path) + endif + let l:path = substitute(fnamemodify(l:path, ':p'), '\\', '/', 'g') + return 'file://' . l:path +endfunction + +function! mozuku#uri_to_path(uri) abort + if exists('*lsp#utils#uri_to_path') + return lsp#utils#uri_to_path(a:uri) + endif + if a:uri !~# '^file://' + return a:uri + endif + let l:path = substitute(a:uri, '^file://', '', '') + let l:path = s:url_decode(l:path) + if has('win32') && l:path =~# '^/[A-Za-z]:' + let l:path = l:path[1:] + endif + return l:path +endfunction + +function! s:url_decode(str) abort + return substitute(a:str, '%\(\x\x\)', '\=nr2char("0x" . submatch(1))', 'g') +endfunction diff --git a/vim-mozuku/lua/mozuku.lua b/vim-mozuku/lua/mozuku.lua new file mode 100644 index 0000000..d3fbd92 --- /dev/null +++ b/vim-mozuku/lua/mozuku.lua @@ -0,0 +1,191 @@ +local M = {} + +local state = { + comment = {}, + content = {}, + semantic = {}, +} + +local ns_comment = vim.api.nvim_create_namespace('mozuku_comment') +local ns_content = vim.api.nvim_create_namespace('mozuku_content') +local ns_semantic = vim.api.nvim_create_namespace('mozuku_semantic') + +local token_groups = { + noun = 'MozukuNoun', + verb = 'MozukuVerb', + adjective = 'MozukuAdjective', + adverb = 'MozukuAdverb', + particle = 'MozukuParticle', + aux = 'MozukuAux', + conjunction = 'MozukuConjunction', + symbol = 'MozukuSymbol', + interj = 'MozukuInterj', + prefix = 'MozukuPrefix', + suffix = 'MozukuSuffix', + unknown = 'MozukuUnknown', +} + +local function language_id_for(bufnr, filetype) + if filetype == 'tex' or filetype == 'plaintex' then + return 'latex' + end + return filetype +end + +local function to_byte_col(line, utf16_col) + return vim.fn['mozuku#utf16_to_byte'](line, utf16_col) +end + +local function buf_line(bufnr, line) + local lines = vim.api.nvim_buf_get_lines(bufnr, line, line + 1, true) + return lines[1] or '' +end + +local function apply_ranges(bufnr, ranges, ns, group, priority) + for _, range in ipairs(ranges) do + local s = range.start + local e = range['end'] + if not s or not e then + goto continue + end + local start_line = buf_line(bufnr, s.line) + local end_line = buf_line(bufnr, e.line) + local start_col = to_byte_col(start_line, s.character) + local end_col = to_byte_col(end_line, e.character) + vim.api.nvim_buf_set_extmark(bufnr, ns, s.line, start_col, { + end_line = e.line, + end_col = end_col, + hl_group = group, + priority = priority, + }) + ::continue:: + end +end + +local function apply_semantic(bufnr, payload) + vim.api.nvim_buf_clear_namespace(bufnr, ns_semantic, 0, -1) + local tokens = payload.tokens or {} + for _, token in ipairs(tokens) do + local range = token.range + if range then + local group = token_groups[token.type] or 'MozukuUnknown' + apply_ranges(bufnr, { range }, ns_semantic, group, 200) + end + end +end + +local function apply_comment(bufnr, payload) + vim.api.nvim_buf_clear_namespace(bufnr, ns_comment, 0, -1) + local ranges = payload.ranges or {} + apply_ranges(bufnr, ranges, ns_comment, 'MozukuComment', 200) +end + +local function apply_content(bufnr, payload, has_semantic) + if has_semantic then + vim.api.nvim_buf_clear_namespace(bufnr, ns_content, 0, -1) + return + end + vim.api.nvim_buf_clear_namespace(bufnr, ns_content, 0, -1) + local ranges = payload.ranges or {} + apply_ranges(bufnr, ranges, ns_content, 'MozukuContent', 200) +end + +local function apply_for_uri(uri) + local bufnr = vim.uri_to_bufnr(uri) + if not vim.api.nvim_buf_is_loaded(bufnr) then + return + end + local semantic = state.semantic[uri] + local comment = state.comment[uri] + local content = state.content[uri] + + if semantic then + apply_semantic(bufnr, semantic) + else + vim.api.nvim_buf_clear_namespace(bufnr, ns_semantic, 0, -1) + end + + if comment then + apply_comment(bufnr, comment) + else + vim.api.nvim_buf_clear_namespace(bufnr, ns_comment, 0, -1) + end + + if content then + apply_content(bufnr, content, semantic and (#(semantic.tokens or {}) > 0)) + else + vim.api.nvim_buf_clear_namespace(bufnr, ns_content, 0, -1) + end +end + +function M.setup() + vim.lsp.handlers['mozuku/commentHighlights'] = function(_, result) + if not result or not result.uri then + return + end + state.comment[result.uri] = result + apply_for_uri(result.uri) + end + + vim.lsp.handlers['mozuku/contentHighlights'] = function(_, result) + if not result or not result.uri then + return + end + state.content[result.uri] = result + apply_for_uri(result.uri) + end + + vim.lsp.handlers['mozuku/semanticHighlights'] = function(_, result) + if not result or not result.uri then + return + end + state.semantic[result.uri] = result + apply_for_uri(result.uri) + end +end + +function M.start(bufnr, config) + if vim.b[bufnr].mozuku_attached then + return + end + + local path = vim.api.nvim_buf_get_name(bufnr) + local root = nil + local git = vim.fs.find('.git', { path = vim.fs.dirname(path), upward = true }) + if git and git[1] then + root = vim.fs.dirname(git[1]) + end + if not root or root == '' then + root = vim.fs.dirname(path) + end + + local server_path = config.server_path or 'mozuku-lsp' + local cmd + if type(server_path) == 'table' then + cmd = server_path + else + cmd = { server_path } + end + + vim.lsp.start({ + name = 'mozuku', + cmd = cmd, + root_dir = root, + init_options = config.init_options or {}, + get_language_id = language_id_for, + on_attach = function(_, b) + vim.b[b].mozuku_attached = true + end, + }, { + bufnr = bufnr, + }) +end + +function M.apply(bufnr) + local uri = vim.uri_from_bufnr(bufnr) + if uri then + apply_for_uri(uri) + end +end + +return M diff --git a/vim-mozuku/plugin/mozuku.vim b/vim-mozuku/plugin/mozuku.vim new file mode 100644 index 0000000..fe35336 --- /dev/null +++ b/vim-mozuku/plugin/mozuku.vim @@ -0,0 +1,34 @@ +if exists('g:loaded_mozuku') + finish +endif +let g:loaded_mozuku = 1 + +if !exists('g:mozuku_filetypes') + let g:mozuku_filetypes = [ + \ 'japanese', + \ 'c', + \ 'cpp', + \ 'html', + \ 'python', + \ 'javascript', + \ 'javascriptreact', + \ 'typescript', + \ 'typescriptreact', + \ 'rust', + \ 'tex', + \ 'plaintex', + \ 'latex', + \ ] +endif + +call mozuku#init() + +augroup mozuku + autocmd! + execute 'autocmd FileType ' . join(g:mozuku_filetypes, ',') . ' call mozuku#maybe_start(0)' + autocmd BufEnter *.ja.txt,*.ja.md call mozuku#maybe_start(1) + autocmd BufEnter * call mozuku#apply_current() +augroup END + +command! MozukuStart call mozuku#maybe_start(1) +command! MozukuRefresh call mozuku#apply_current()