diff --git a/mozuku-lsp/CMakeLists.txt b/mozuku-lsp/CMakeLists.txt
index aebda49..68e42f6 100644
--- a/mozuku-lsp/CMakeLists.txt
+++ b/mozuku-lsp/CMakeLists.txt
@@ -126,6 +126,9 @@ set(MOZUKU_SOURCES
   src/grammar_checker.cpp
   src/wikipedia.cpp
   src/comment_extractor.cpp
+  src/document_preprocessor.cpp
+  src/presenter.cpp
+  src/tree_sitter_document.cpp
 )
 
 add_executable(mozuku-lsp ${MOZUKU_SOURCES})
diff --git a/mozuku-lsp/include/analyzer.hpp b/mozuku-lsp/include/analyzer.hpp
index 4c02f6f..4146b06 100644
--- a/mozuku-lsp/include/analyzer.hpp
+++ b/mozuku-lsp/include/analyzer.hpp
@@ -1,99 +1,12 @@
 #pragma once
 
+#include "mozuku/core/config.hpp"
+#include "mozuku/core/types.hpp"
+
 #include <memory>
 #include <string>
 #include <vector>
 
-struct TokenData;
-struct Diagnostic;
-
-struct DetailedPOS {
-  std::string mainPOS;       // 主品詞 (名詞, 動詞, 助詞...)
-  std::string subPOS1;       // 品詞細分類1 (格助詞, 副助詞, 係助詞...)
-  std::string subPOS2;       // 品詞細分類2
-  std::string subPOS3;       // 品詞細分類3
-  std::string inflection;    // 活用型
-  std::string conjugation;   // 活用形
-  std::string baseForm;      // 原形
-  std::string reading;       // 読み
-  std::string pronunciation; // 発音
-
-  bool isParticle() const { return mainPOS == "助詞"; }
-  bool isVerb() const { return mainPOS == "動詞"; }
-  bool isNoun() const { return mainPOS == "名詞"; }
-};
-
-// Information about a particle (助詞) token
-struct ParticleInfo {
-  std::string surface;  // 表層形
-  std::string function; // 格助詞, 副助詞, 係助詞, 接続助詞
-  std::string role;     // より詳細な役割
-  size_t position;      // 文中の位置 (バイト単位)
-  int tokenIndex;       // トークン配列内のインデックス
-  int sentenceId;       // 所属する文のID
-};
-
-// Sentence boundary information
-struct SentenceBoundary {
-  size_t start;     // 文の開始位置 (バイト単位)
-  size_t end;       // 文の終了位置 (バイト単位)
-  int sentenceId;   // 文のID
-  std::string text; // 文の内容
-};
-
-// Dependency parsing information from CaboCha
-struct DependencyInfo {
-  int chunkId;      // チャンクID
-  int headId;       // 係り先チャンクID
-  double score;     // 係り受けスコア
-  std::string text; // チャンクのテキスト
-};
-
-// Configuration structures (shared between LSP server and analyzer)
-struct MeCabConfig {
-  std::string dicPath;           // Dictionary directory path
-  std::string charset = "UTF-8"; // Character encoding
-};
-
-struct AnalysisConfig {
-  bool enableCaboCha = true; // Enable CaboCha dependency parsing
-  bool grammarCheck = true;  // Enable grammar diagnostics
-  double minJapaneseRatio =
-      0.1; // Minimum Japanese character ratio for analysis
-
-  struct RuleToggles {
-    bool commaLimit = true;
-    bool adversativeGa = true;
-    bool duplicateParticleSurface = true;
-    bool adjacentParticles = true;
-    bool conjunctionRepeat = true;
-    bool raDropping = true;
-    int commaLimitMax = 3;
-    int adversativeGaMax = 1;
-    int duplicateParticleSurfaceMaxRepeat = 1;
-    int adjacentParticlesMaxRepeat = 1;
-    int conjunctionRepeatMax = 1;
-  } rules;
-
-  // Enhanced grammar warning settings
-  struct WarningLevels {
-    bool particleDuplicate = true;  // 二重助詞警告
-    bool particleSequence = true;   // 不適切助詞連続
-    bool particleMismatch = true;   // 動詞-助詞不整合
-    bool sentenceStructure = false; // 文構造問題 (実験的)
-    bool styleConsistency = false;  // 文体混在 (実験的)
-    bool redundancy = false;        // 冗長表現 (実験的)
-  } warnings;
-
-  int warningMinSeverity =
-      2; // 最小警告レベル (1=Error, 2=Warning, 3=Info, 4=Hint)
-};
-
-struct MoZukuConfig {
-  MeCabConfig mecab;
-  AnalysisConfig analysis;
-};
-
 void analyzeText(const std::string &text, std::vector<TokenData> &tokens,
                  std::vector<Diagnostic> &diags,
                  const MoZukuConfig *config = nullptr);
@@ -101,15 +14,6 @@ void analyzeText(const std::string &text, std::vector<TokenData> &tokens,
 void performGrammarDiagnostics(const std::string &text,
                                std::vector<Diagnostic> &diags);
 
-size_t computeByteOffset(const std::string &text, int line, int character);
-
-namespace MoZukuModifiers {
-static constexpr unsigned Proper = 1u << 0;  // "proper"
-static constexpr unsigned Numeric = 1u << 1; // "numeric"
-static constexpr unsigned Kana = 1u << 2;    // "kana"
-static constexpr unsigned Kanji = 1u << 3;   // "kanji"
-} // namespace MoZukuModifiers
-
 namespace MoZuku {
 
 namespace mecab {
@@ -132,6 +36,16 @@ class Analyzer {
   bool isCaboChaAvailable() const;
 
 private:
+  struct PreparedText {
+    std::string cleanText;
+    double japaneseRatio{0.0};
+    bool belowMinJapaneseRatio{false};
+  };
+
+  PreparedText prepareText(const std::string &text,
+                           bool enforceMinJapaneseRatio) const;
+  std::vector<TokenData> analyzePreparedText(const PreparedText &prepared);
+
   std::unique_ptr<mecab::MeCabManager> mecab_manager_;
   MoZukuConfig config_;
   std::string system_charset_;
diff --git a/mozuku-lsp/include/encoding_utils.hpp b/mozuku-lsp/include/encoding_utils.hpp
index 3260a75..c13ec83 100644
--- a/mozuku-lsp/include/encoding_utils.hpp
+++ b/mozuku-lsp/include/encoding_utils.hpp
@@ -5,9 +5,14 @@
 namespace MoZuku {
 namespace encoding {
 
+struct ConversionOptions {
+  bool skipInvalidInput{false};
+};
+
 std::string convertEncoding(const std::string &input,
                             const std::string &fromCharset,
-                            const std::string &toCharset = "UTF-8");
+                            const std::string &toCharset = "UTF-8",
+                            ConversionOptions options = {});
 
 std::string systemToUtf8(const std::string &input,
                          const std::string &systemCharset);
@@ -15,5 +20,9 @@ std::string systemToUtf8(const std::string &input,
 std::string utf8ToSystem(const std::string &input,
                          const std::string &systemCharset);
 
+std::string sanitizeUtf8(const std::string &input);
+
+size_t utf8SequenceLength(unsigned char c);
+
 } // namespace encoding
 } // namespace MoZuku
diff --git a/mozuku-lsp/include/grammar_checker.hpp b/mozuku-lsp/include/grammar_checker.hpp
index 60d68f2..f76e76f 100644
--- a/mozuku-lsp/include/grammar_checker.hpp
+++ b/mozuku-lsp/include/grammar_checker.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
-#include "analyzer.hpp"
-#include "lsp.hpp"
+#include "mozuku/core/config.hpp"
+#include "mozuku/core/types.hpp"
 #include <string>
 #include <vector>
 
diff --git a/mozuku-lsp/include/lsp.hpp b/mozuku-lsp/include/lsp.hpp
index cc136d4..1cf0f34 100644
--- a/mozuku-lsp/include/lsp.hpp
+++ b/mozuku-lsp/include/lsp.hpp
@@ -1,6 +1,10 @@
 #pragma once
 
 #include "analyzer.hpp"
+#include "mozuku/analysis/document_preprocessor.hpp"
+#include "mozuku/core/config.hpp"
+#include "mozuku/core/types.hpp"
+#include "mozuku/lsp/presenter.hpp"
 #include <cstddef>
 #include <istream>
 #include <memory>
@@ -15,78 +19,35 @@
 
 using json = nlohmann::json;
 
-struct Position {
-  int line{0};
-  int character{0};
-};
-
-struct Range {
-  Position start;
-  Position end;
-};
-
-struct Diagnostic {
-  Range range;
-  int severity{2};
-  std::string message;
-};
-
-struct TokenData {
-  int line{0};
-  int startChar{0};
-  int endChar{0};
-  std::string tokenType; // e.g. "noun", "verb" ...
-  unsigned int tokenModifiers{0};
-
-  std::string surface; // 表層形
-  std::string
-      feature; // 品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用型,活用形,原形,読み,発音
-  std::string baseForm;      // 原形
-  std::string reading;       // 読み
-  std::string pronunciation; // 発音
-};
-
-struct AnalyzerResult {
-  std::vector<TokenData> tokens;
-  std::vector<Diagnostic> diags;
-};
-
-struct ByteRange {
-  size_t startByte{0};
-  size_t endByte{0};
-};
-
 class LSPServer {
 public:
   LSPServer(std::istream &in, std::ostream &out);
   void run();
 
 private:
+  struct DocumentState {
+    std::string text;
+    std::string languageId;
+    std::vector<TokenData> tokens;
+    bool tokensCached{false};
+    std::unordered_map<int, std::vector<Diagnostic>> diagnosticsByLine;
+    std::vector<MoZuku::comments::CommentSegment> commentSegments;
+    std::vector<ByteRange> contentHighlightRanges;
+  };
+
   std::istream &in_;
   std::ostream &out_;
 
-  // インメモリテキストストア: uri -> 全テキスト
-  std::unordered_map<std::string, std::string> docs_;
-  // ドキュメントの言語ID: uri -> languageId
-  std::unordered_map<std::string, std::string> docLanguages_;
-  // hover用トークン情報: uri -> トークンデータ
-  std::unordered_map<std::string, std::vector<TokenData>> docTokens_;
-  // 行ベースの診断キャッシュ: uri -> 行番号 -> 診断情報
-  std::unordered_map<std::string,
-                     std::unordered_map<int, std::vector<Diagnostic>>>
-      docDiagnostics_;
-  // コメント解析に使用するセグメント
-  std::unordered_map<std::string, std::vector<MoZuku::comments::CommentSegment>>
-      docCommentSegments_;
-  // HTML/LaTeX 本文ハイライト用の範囲
-  std::unordered_map<std::string, std::vector<ByteRange>>
-      docContentHighlightRanges_;
+  // ドキュメント単位の状態: uri -> テキスト/解析結果/補助メタデータ
+  std::unordered_map<std::string, DocumentState> documents_;
   std::vector<std::string> tokenTypes_;
   std::vector<std::string> tokenModifiers_;
 
   MoZukuConfig config_;
 
   std::unique_ptr<MoZuku::Analyzer> analyzer_;
+  MoZuku::analysis::DocumentPreprocessor preprocessor_;
+  MoZuku::lsp::Presenter presenter_;
 
   bool readMessage(std::string &jsonPayload);
   void reply(const json &msg);
@@ -103,26 +64,21 @@ class LSPServer {
   json onSemanticTokensRange(const json &id, const json &params);
   json onHover(const json &id, const json &params);
 
-  void analyzeAndPublish(const std::string &uri, const std::string &text);
+  DocumentState &ensureDocument(const std::string &uri);
+  DocumentState *findDocument(const std::string &uri);
+  const DocumentState *findDocument(const std::string &uri) const;
+  static bool isJapaneseLanguage(const DocumentState &document);
+
+  void analyzeAndPublish(const std::string &uri);
   void analyzeChangedLines(const std::string &uri, const std::string &newText,
                            const std::string &oldText);
-  std::string prepareAnalysisText(const std::string &uri,
-                                  const std::string &text);
-  void sendCommentHighlights(
-      const std::string &uri, const std::string &text,
-      const std::vector<MoZuku::comments::CommentSegment> &segments);
-  void sendSemanticHighlights(const std::string &uri,
-                              const std::vector<TokenData> &tokens);
-  void sendContentHighlights(const std::string &uri, const std::string &text,
-                             const std::vector<ByteRange> &ranges);
+  MoZuku::analysis::ProcessedDocument prepareDocument(DocumentState &document);
   json buildSemanticTokens(const std::string &uri);
-  json buildSemanticTokensFromTokens(const std::vector<TokenData> &tokens);
 
-  void cacheDiagnostics(const std::string &uri,
+  void cacheDiagnostics(DocumentState &document,
                         const std::vector<Diagnostic> &diags);
-  void removeDiagnosticsForLines(const std::string &uri,
+  void removeDiagnosticsForLines(DocumentState &document,
                                  const std::set<int> &lines);
-  std::vector<Diagnostic> getAllDiagnostics(const std::string &uri) const;
   std::set<int> findChangedLines(const std::string &oldText,
                                  const std::string &newText) const;
 };
diff --git a/mozuku-lsp/include/mozuku/analysis/document_preprocessor.hpp b/mozuku-lsp/include/mozuku/analysis/document_preprocessor.hpp
new file mode 100644
index 0000000..56038cf
--- /dev/null
+++ b/mozuku-lsp/include/mozuku/analysis/document_preprocessor.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "comment_extractor.hpp"
+#include "mozuku/core/types.hpp"
+
+#include <string>
+#include <vector>
+
+namespace MoZuku::analysis {
+
+struct ProcessedDocument {
+  std::string analysisText;
+  std::vector<comments::CommentSegment> commentSegments;
+  std::vector<ByteRange> contentHighlightRanges;
+};
+
+class DocumentPreprocessor {
+public:
+  ProcessedDocument prepare(const std::string &languageId,
+                            const std::string &text) const;
+};
+
+} // namespace MoZuku::analysis
diff --git a/mozuku-lsp/include/mozuku/core/config.hpp b/mozuku-lsp/include/mozuku/core/config.hpp
new file mode 100644
index 0000000..0be9ef6
--- /dev/null
+++ b/mozuku-lsp/include/mozuku/core/config.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <string>
+
+namespace MoZuku::core {
+
+struct MeCabConfig {
+  std::string dicPath;
+  std::string charset = "UTF-8";
+};
+
+struct AnalysisConfig {
+  bool enableCaboCha = true;
+  bool grammarCheck = true;
+  double minJapaneseRatio = 0.1;
+
+  struct RuleToggles {
+    bool commaLimit = true;
+    bool adversativeGa = true;
+    bool duplicateParticleSurface = true;
+    bool adjacentParticles = true;
+    bool conjunctionRepeat = true;
+    bool raDropping = true;
+    int commaLimitMax = 3;
+    int adversativeGaMax = 1;
+    int duplicateParticleSurfaceMaxRepeat = 1;
+    int adjacentParticlesMaxRepeat = 1;
+    int conjunctionRepeatMax = 1;
+  } rules;
+
+  struct WarningLevels {
+    bool particleDuplicate = true;
+    bool particleSequence = true;
+    bool particleMismatch = true;
+    bool sentenceStructure = false;
+    bool styleConsistency = false;
+    bool redundancy = false;
+  } warnings;
+
+  int warningMinSeverity = 2;
+};
+
+struct MoZukuConfig {
+  MeCabConfig mecab;
+  AnalysisConfig analysis;
+};
+
+} // namespace MoZuku::core
+
+using MeCabConfig = MoZuku::core::MeCabConfig;
+using AnalysisConfig = MoZuku::core::AnalysisConfig;
+using MoZukuConfig = MoZuku::core::MoZukuConfig;
diff --git a/mozuku-lsp/include/mozuku/core/debug.hpp b/mozuku-lsp/include/mozuku/core/debug.hpp
new file mode 100644
index 0000000..112e13c
--- /dev/null
+++ b/mozuku-lsp/include/mozuku/core/debug.hpp
@@ -0,0 +1,12 @@
+#pragma once
+
+#include <cstdlib>
+
+namespace MoZuku::debug {
+
+inline bool isEnabled() {
+  static const bool enabled = std::getenv("MOZUKU_DEBUG") != nullptr;
+  return enabled;
+}
+
+} // namespace MoZuku::debug
diff --git a/mozuku-lsp/include/mozuku/core/types.hpp b/mozuku-lsp/include/mozuku/core/types.hpp
new file mode 100644
index 0000000..73dc5ed
--- /dev/null
+++ b/mozuku-lsp/include/mozuku/core/types.hpp
@@ -0,0 +1,106 @@
+#pragma once
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+namespace MoZuku::core {
+
+struct Position {
+  int line{0};
+  int character{0};
+};
+
+struct Range {
+  Position start;
+  Position end;
+};
+
+struct Diagnostic {
+  Range range;
+  int severity{2};
+  std::string message;
+};
+
+struct TokenData {
+  int line{0};
+  int startChar{0};
+  int endChar{0};
+  std::string tokenType;
+  unsigned int tokenModifiers{0};
+
+  std::string surface;
+  std::string feature;
+  std::string baseForm;
+  std::string reading;
+  std::string pronunciation;
+};
+
+struct AnalyzerResult {
+  std::vector<TokenData> tokens;
+  std::vector<Diagnostic> diags;
+};
+
+struct ByteRange {
+  size_t startByte{0};
+  size_t endByte{0};
+};
+
+struct DetailedPOS {
+  std::string mainPOS;
+  std::string subPOS1;
+  std::string subPOS2;
+  std::string subPOS3;
+  std::string inflection;
+  std::string conjugation;
+  std::string baseForm;
+  std::string reading;
+  std::string pronunciation;
+
+  bool isParticle() const { return mainPOS == "助詞"; }
+  bool isVerb() const { return mainPOS == "動詞"; }
+  bool isNoun() const { return mainPOS == "名詞"; }
+};
+
+struct ParticleInfo {
+  std::string surface;
+  std::string function;
+  std::string role;
+  size_t position{0};
+  int tokenIndex{0};
+  int sentenceId{0};
+};
+
+struct SentenceBoundary {
+  size_t start{0};
+  size_t end{0};
+  int sentenceId{0};
+  std::string text;
+};
+
+struct DependencyInfo {
+  int chunkId{0};
+  int headId{0};
+  double score{0.0};
+  std::string text;
+};
+
+} // namespace MoZuku::core
+
+using Position = MoZuku::core::Position;
+using Range = MoZuku::core::Range;
+using Diagnostic = MoZuku::core::Diagnostic;
+using TokenData = MoZuku::core::TokenData;
+using AnalyzerResult = MoZuku::core::AnalyzerResult;
+using ByteRange = MoZuku::core::ByteRange;
+using DetailedPOS = MoZuku::core::DetailedPOS;
+using ParticleInfo = MoZuku::core::ParticleInfo;
+using SentenceBoundary = MoZuku::core::SentenceBoundary;
+using DependencyInfo = MoZuku::core::DependencyInfo;
+
+namespace MoZukuModifiers {
+static constexpr unsigned Proper = 1u << 0;
+static constexpr unsigned Numeric = 1u << 1;
+static constexpr unsigned Kana = 1u << 2;
+static constexpr unsigned Kanji = 1u << 3;
+} // namespace MoZukuModifiers
diff --git a/mozuku-lsp/include/mozuku/lsp/presenter.hpp b/mozuku-lsp/include/mozuku/lsp/presenter.hpp
new file mode 100644
index 0000000..346d4af
--- /dev/null
+++ b/mozuku-lsp/include/mozuku/lsp/presenter.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "comment_extractor.hpp"
+#include "mozuku/core/types.hpp"
+
+#include <nlohmann/json.hpp>
+#include <string>
+#include <vector>
+
+namespace MoZuku::lsp {
+
+class Presenter {
+public:
+  using json = nlohmann::json;
+
+  json publishDiagnosticsParams(const std::string &uri,
+                                const std::vector<Diagnostic> &diags) const;
+
+  json commentHighlightsParams(
+      const std::string &uri, const std::string &text,
+      const std::vector<comments::CommentSegment> &segments) const;
+
+  json contentHighlightsParams(const std::string &uri, const std::string &text,
+                               const std::vector<ByteRange> &ranges) const;
+
+  json semanticHighlightsParams(const std::string &uri, bool isJapanese,
+                                const std::vector<TokenData> &tokens) const;
+
+  json semanticTokensData(const std::vector<TokenData> &tokens,
+                          const std::vector<std::string> &tokenTypes) const;
+
+  json hoverResult(const TokenData &token, const std::string &markdown) const;
+};
+
+} // namespace MoZuku::lsp
diff --git a/mozuku-lsp/include/mozuku/treesitter/document.hpp b/mozuku-lsp/include/mozuku/treesitter/document.hpp
new file mode 100644
index 0000000..de395e1
--- /dev/null
+++ b/mozuku-lsp/include/mozuku/treesitter/document.hpp
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <tree_sitter/api.h>
+
+namespace MoZuku::treesitter {
+
+const TSLanguage *resolveLanguage(const std::string &languageId);
+bool isLanguageSupported(const std::string &languageId);
+
+class ParsedDocument {
+public:
+  ParsedDocument();
+  ParsedDocument(const std::string &languageId, const std::string &text);
+  ParsedDocument(const TSLanguage *language, const std::string &text);
+
+  ParsedDocument(ParsedDocument &&other) noexcept = default;
+  ParsedDocument &operator=(ParsedDocument &&other) noexcept = default;
+
+  ParsedDocument(const ParsedDocument &) = delete;
+  ParsedDocument &operator=(const ParsedDocument &) = delete;
+
+  bool isValid() const;
+  TSNode root() const;
+
+private:
+  std::unique_ptr<TSTree, void (*)(TSTree *)> tree_;
+};
+
+template <typename Visitor>
+void walkDepthFirst(TSNode root, Visitor &&visitor) {
+  if (ts_node_is_null(root)) {
+    return;
+  }
+
+  std::vector<TSNode> stack;
+  stack.push_back(root);
+
+  while (!stack.empty()) {
+    TSNode node = stack.back();
+    stack.pop_back();
+
+    if (ts_node_is_null(node)) {
+      continue;
+    }
+
+    if (!visitor(node)) {
+      continue;
+    }
+
+    uint32_t childCount = ts_node_child_count(node);
+    for (uint32_t i = childCount; i > 0; --i) {
+      TSNode child = ts_node_child(node, i - 1);
+      if (!ts_node_is_null(child)) {
+        stack.push_back(child);
+      }
+    }
+  }
+}
+
+} // namespace MoZuku::treesitter
diff --git a/mozuku-lsp/include/pos_analyzer.hpp b/mozuku-lsp/include/pos_analyzer.hpp
index e11e014..81fe0d8 100644
--- a/mozuku-lsp/include/pos_analyzer.hpp
+++ b/mozuku-lsp/include/pos_analyzer.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "analyzer.hpp"
+#include "mozuku/core/types.hpp"
 #include <string>
 #include <vector>
 
@@ -10,6 +10,11 @@ namespace pos {
 class POSAnalyzer {
 public:
   static std::string mapPosToType(const char *feature);
+  static bool isNounFeature(const std::string &feature);
+  static bool isParticleFeature(const std::string &feature);
+  static bool isConjunctionFeature(const std::string &feature);
+  static bool isAdversativeGaFeature(const std::string &feature);
+  static std::string particleKey(const std::string &feature);
 
   static void parseFeatureDetails(const char *feature, std::string &baseForm,
                                   std::string &reading,
@@ -24,6 +29,8 @@ class POSAnalyzer {
                                    size_t length, const char *feature);
 
 private:
+  static std::vector<std::string>
+  parseFeatureFields(const std::string &feature);
   static std::vector<std::string> splitFeature(const std::string &feature);
 
   static void analyzeCharacterTypes(const std::string &text, size_t start,
diff --git a/mozuku-lsp/include/text_processor.hpp b/mozuku-lsp/include/text_processor.hpp
index a197e27..d3354c6 100644
--- a/mozuku-lsp/include/text_processor.hpp
+++ b/mozuku-lsp/include/text_processor.hpp
@@ -1,8 +1,9 @@
 #pragma once
 
-#include "analyzer.hpp"
+#include "mozuku/core/types.hpp"
 #include <string>
 #include <vector>
+#include <cstdint>
 
 namespace MoZuku {
 namespace text {
diff --git a/mozuku-lsp/include/utf16.hpp b/mozuku-lsp/include/utf16.hpp
index 24984a7..06f0a9a 100644
--- a/mozuku-lsp/include/utf16.hpp
+++ b/mozuku-lsp/include/utf16.hpp
@@ -1,6 +1,24 @@
 #pragma once
 
-#include "lsp.hpp"
+#include "mozuku/core/types.hpp"
+
+#include <string>
+#include <vector>
+
+class TextOffsetMapper {
+public:
+  explicit TextOffsetMapper(const std::string &text);
+
+  const std::vector<size_t> &lineStarts() const;
+  Position byteOffsetToPosition(size_t offset) const;
+  size_t positionToByteOffset(int line, int character) const;
+  size_t positionToByteOffset(const Position &position) const;
+  size_t tokenStartByteOffset(const TokenData &token) const;
+
+private:
+  const std::string &text_;
+  std::vector<size_t> line_starts_;
+};
 
 std::vector<size_t> computeLineStarts(const std::string &text);
 
@@ -8,4 +26,10 @@ Position byteOffsetToPosition(const std::string &text,
                               const std::vector<size_t> &lineStarts,
                               size_t offset);
 
+size_t positionToByteOffset(const std::string &text,
+                            const std::vector<size_t> &lineStarts, int line,
+                            int character);
+
+size_t positionToByteOffset(const std::string &text, int line, int character);
+
 size_t utf8ToUtf16Length(const std::string &utf8Str);
diff --git a/mozuku-lsp/src/analyzer.cpp b/mozuku-lsp/src/analyzer.cpp
index 4b80368..7ea8045 100644
--- a/mozuku-lsp/src/analyzer.cpp
+++ b/mozuku-lsp/src/analyzer.cpp
@@ -2,29 +2,19 @@
 #include "encoding_utils.hpp"
 #include "grammar_checker.hpp"
 #include "mecab_manager.hpp"
+#include "mozuku/core/debug.hpp"
 #include "pos_analyzer.hpp"
 #include "text_processor.hpp"
 #include "utf16.hpp"
 
 #include <cabocha.h>
-#include <cstdlib>
 #include <iostream>
 #include <mecab.h>
 
 namespace MoZuku {
 
-static bool isDebugEnabled() {
-  static bool initialized = false;
-  static bool debug = false;
-  if (!initialized) {
-    debug = (std::getenv("MOZUKU_DEBUG") != nullptr);
-    initialized = true;
-  }
-  return debug;
-}
-
 Analyzer::Analyzer() {
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Analyzer created" << std::endl;
   }
 }
@@ -36,7 +26,7 @@ bool Analyzer::initialize(const MoZukuConfig &config) {
   mecab_manager_ =
       std::make_unique<mecab::MeCabManager>(config.analysis.enableCaboCha);
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Initializing analyzer with config" << std::endl;
   }
 
@@ -52,7 +42,7 @@ bool Analyzer::initialize(const MoZukuConfig &config) {
 
   system_charset_ = mecab_manager_->getSystemCharset();
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Analyzer initialized successfully with charset: "
               << system_charset_ << std::endl;
   }
@@ -60,31 +50,38 @@ bool Analyzer::initialize(const MoZukuConfig &config) {
   return true;
 }
 
-std::vector<TokenData> Analyzer::analyzeText(const std::string &text) {
-  std::vector<TokenData> tokens;
-
+Analyzer::PreparedText
+Analyzer::prepareText(const std::string &text,
+                      bool enforceMinJapaneseRatio) const {
+  PreparedText prepared;
   if (text.empty()) {
-    return tokens;
+    return prepared;
   }
 
-  if (isDebugEnabled()) {
-    std::cerr << "[DEBUG] Analyzing text of length: " << text.size()
-              << std::endl;
+  prepared.cleanText = text::TextProcessor::sanitizeUTF8(text);
+  if (prepared.cleanText.empty()) {
+    return prepared;
   }
 
-  std::string cleanText = text::TextProcessor::sanitizeUTF8(text);
-  double japaneseRatio = text::TextProcessor::calculateJapaneseRatio(cleanText);
-  if (config_.analysis.minJapaneseRatio > 0.0 &&
-      japaneseRatio < config_.analysis.minJapaneseRatio) {
-    if (isDebugEnabled()) {
-      std::cerr << "[DEBUG] Skipping analysis due to low Japanese ratio: "
-                << japaneseRatio << " < " << config_.analysis.minJapaneseRatio
-                << std::endl;
-    }
+  prepared.japaneseRatio =
+      text::TextProcessor::calculateJapaneseRatio(prepared.cleanText);
+  prepared.belowMinJapaneseRatio =
+      enforceMinJapaneseRatio && config_.analysis.minJapaneseRatio > 0.0 &&
+      prepared.japaneseRatio < config_.analysis.minJapaneseRatio;
+
+  return prepared;
+}
+
+std::vector<TokenData>
+Analyzer::analyzePreparedText(const PreparedText &prepared) {
+  std::vector<TokenData> tokens;
+
+  if (prepared.cleanText.empty()) {
     return tokens;
   }
 
-  std::string systemText = encoding::utf8ToSystem(cleanText, system_charset_);
+  std::string systemText =
+      encoding::utf8ToSystem(prepared.cleanText, system_charset_);
 
   MeCab::Tagger *tagger = mecab_manager_->getMeCabTagger();
   if (!tagger) {
@@ -98,7 +95,7 @@ std::vector<TokenData> Analyzer::analyzeText(const std::string &text) {
     return tokens;
   }
 
-  std::vector<size_t> lineStarts = computeLineStarts(cleanText);
+  TextOffsetMapper offsetMapper(prepared.cleanText);
 
   size_t currentBytePos = 0;
 
@@ -116,17 +113,17 @@ std::vector<TokenData> Analyzer::analyzeText(const std::string &text) {
     if (token.surface.empty())
       continue;
 
-    while (currentBytePos < cleanText.size()) {
-      size_t remainingBytes = cleanText.size() - currentBytePos;
+    while (currentBytePos < prepared.cleanText.size()) {
+      size_t remainingBytes = prepared.cleanText.size() - currentBytePos;
       if (remainingBytes >= token.surface.size() &&
-          cleanText.substr(currentBytePos, token.surface.size()) ==
+          prepared.cleanText.substr(currentBytePos, token.surface.size()) ==
               token.surface) {
         break;
       }
       currentBytePos++;
     }
 
-    Position pos = byteOffsetToPosition(cleanText, lineStarts, currentBytePos);
+    Position pos = offsetMapper.byteOffsetToPosition(currentBytePos);
     token.line = pos.line;
     token.startChar = pos.character;
     token.endChar = pos.character + utf8ToUtf16Length(token.surface);
@@ -142,13 +139,14 @@ std::vector<TokenData> Analyzer::analyzeText(const std::string &text) {
 
     token.tokenType = pos::POSAnalyzer::mapPosToType(token.feature.c_str());
     token.tokenModifiers = pos::POSAnalyzer::computeModifiers(
-        cleanText, currentBytePos, token.surface.size(), token.feature.c_str());
+        prepared.cleanText, currentBytePos, token.surface.size(),
+        token.feature.c_str());
 
     tokens.push_back(token);
     currentBytePos += token.surface.size();
   }
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Analysis completed: " << tokens.size()
               << " tokens generated" << std::endl;
   }
@@ -156,6 +154,25 @@ std::vector<TokenData> Analyzer::analyzeText(const std::string &text) {
   return tokens;
 }
 
+std::vector<TokenData> Analyzer::analyzeText(const std::string &text) {
+  if (debug::isEnabled()) {
+    std::cerr << "[DEBUG] Analyzing text of length: " << text.size()
+              << std::endl;
+  }
+
+  PreparedText prepared = prepareText(text, true);
+  if (prepared.belowMinJapaneseRatio) {
+    if (debug::isEnabled()) {
+      std::cerr << "[DEBUG] Skipping analysis due to low Japanese ratio: "
+                << prepared.japaneseRatio << " < "
+                << config_.analysis.minJapaneseRatio << std::endl;
+    }
+    return {};
+  }
+
+  return analyzePreparedText(prepared);
+}
+
 std::vector<Diagnostic> Analyzer::checkGrammar(const std::string &text) {
   std::vector<Diagnostic> diagnostics;
 
@@ -163,31 +180,29 @@ std::vector<Diagnostic> Analyzer::checkGrammar(const std::string &text) {
     return diagnostics;
   }
 
-  std::string cleanText = text::TextProcessor::sanitizeUTF8(text);
-  double japaneseRatio = text::TextProcessor::calculateJapaneseRatio(cleanText);
-  if (config_.analysis.minJapaneseRatio > 0.0 &&
-      japaneseRatio < config_.analysis.minJapaneseRatio) {
-    if (isDebugEnabled()) {
+  PreparedText prepared = prepareText(text, true);
+  if (prepared.belowMinJapaneseRatio) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] Skipping grammar check due to low Japanese ratio: "
-                << japaneseRatio << " < " << config_.analysis.minJapaneseRatio
-                << std::endl;
+                << prepared.japaneseRatio << " < "
+                << config_.analysis.minJapaneseRatio << std::endl;
     }
     return diagnostics;
   }
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Starting grammar check" << std::endl;
   }
 
-  std::vector<TokenData> tokens = analyzeText(text);
+  std::vector<TokenData> tokens = analyzePreparedText(prepared);
 
   std::vector<SentenceBoundary> sentences =
-      text::TextProcessor::splitIntoSentences(text);
+      text::TextProcessor::splitIntoSentences(prepared.cleanText);
 
-  grammar::GrammarChecker::checkGrammar(text, tokens, sentences, diagnostics,
-                                        &config_);
+  grammar::GrammarChecker::checkGrammar(prepared.cleanText, tokens, sentences,
+                                        diagnostics, &config_);
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Grammar check completed: " << diagnostics.size()
               << " diagnostics generated" << std::endl;
   }
@@ -200,19 +215,24 @@ Analyzer::analyzeDependencies(const std::string &text) {
   std::vector<DependencyInfo> dependencies;
 
   if (!mecab_manager_->isCaboChaAvailable()) {
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] CaboCha not available for dependency analysis"
                 << std::endl;
     }
     return dependencies;
   }
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Starting dependency analysis" << std::endl;
   }
 
-  std::string cleanText = text::TextProcessor::sanitizeUTF8(text);
-  std::string systemText = encoding::utf8ToSystem(cleanText, system_charset_);
+  PreparedText prepared = prepareText(text, false);
+  if (prepared.cleanText.empty()) {
+    return dependencies;
+  }
+
+  std::string systemText =
+      encoding::utf8ToSystem(prepared.cleanText, system_charset_);
 
   cabocha_t *parser = mecab_manager_->getCaboChaParser();
   if (!parser) {
@@ -258,7 +278,7 @@ Analyzer::analyzeDependencies(const std::string &text) {
     dependencies.push_back(dep);
   }
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Dependency analysis completed: "
               << dependencies.size() << " chunks found" << std::endl;
   }
@@ -277,24 +297,3 @@ bool Analyzer::isCaboChaAvailable() const {
 }
 
 } // namespace MoZuku
-
-size_t computeByteOffset(const std::string &text, int line, int character) {
-  std::vector<size_t> lineStarts = computeLineStarts(text);
-  if (line >= static_cast<int>(lineStarts.size())) {
-    return text.size();
-  }
-
-  size_t lineStart = lineStarts[line];
-  size_t bytePos = lineStart;
-  int utf16Pos = 0;
-
-  while (bytePos < text.size() && utf16Pos < character &&
-         text[bytePos] != '\n') {
-    unsigned char c = static_cast<unsigned char>(text[bytePos]);
-    int seqLen = (c >= 0xF0) ? 4 : (c >= 0xE0) ? 3 : (c >= 0xC0) ? 2 : 1;
-    bytePos += seqLen;
-    utf16Pos += (seqLen == 4) ? 2 : 1;
-  }
-
-  return bytePos;
-}
diff --git a/mozuku-lsp/src/comment_extractor.cpp b/mozuku-lsp/src/comment_extractor.cpp
index a45bc7e..a093419 100644
--- a/mozuku-lsp/src/comment_extractor.cpp
+++ b/mozuku-lsp/src/comment_extractor.cpp
@@ -1,55 +1,13 @@
 #include "comment_extractor.hpp"
+#include "mozuku/treesitter/document.hpp"
 
-#include <algorithm>
 #include <cctype>
-#include <memory>
 #include <string>
 #include <string_view>
-#include <unordered_map>
 #include <vector>
 
-#include <tree_sitter/api.h>
-
-extern "C" {
-const TSLanguage *tree_sitter_c();
-const TSLanguage *tree_sitter_cpp();
-const TSLanguage *tree_sitter_html();
-const TSLanguage *tree_sitter_javascript();
-const TSLanguage *tree_sitter_python();
-const TSLanguage *tree_sitter_rust();
-const TSLanguage *tree_sitter_typescript();
-const TSLanguage *tree_sitter_tsx();
-const TSLanguage *tree_sitter_latex();
-}
-
 namespace {
 
-using LanguageFactory = const TSLanguage *(*)();
-
-const std::unordered_map<std::string, LanguageFactory> &languageMap() {
-  static const std::unordered_map<std::string, LanguageFactory> map = {
-      {"c", tree_sitter_c},
-      {"cpp", tree_sitter_cpp},
-      {"html", tree_sitter_html},
-      {"c++", tree_sitter_cpp},
-      {"javascript", tree_sitter_javascript},
-      {"javascriptreact", tree_sitter_tsx},
-      {"typescript", tree_sitter_typescript},
-      {"typescriptreact", tree_sitter_tsx},
-      {"tsx", tree_sitter_tsx},
-      {"python", tree_sitter_python},
-      {"rust", tree_sitter_rust},
-      {"latex", tree_sitter_latex}};
-  return map;
-}
-
-std::string toLower(std::string input) {
-  std::transform(
-      input.begin(), input.end(), input.begin(),
-      [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
-  return input;
-}
-
 inline bool isNewline(char c) { return c == '\n' || c == '\r'; }
 
 inline void setSpace(char &c) {
@@ -216,64 +174,22 @@ namespace MoZuku {
 namespace comments {
 
 const TSLanguage *resolveLanguage(const std::string &languageId) {
-  const auto &map = languageMap();
-  auto it = map.find(toLower(languageId));
-  if (it == map.end()) {
-    return nullptr;
-  }
-  return it->second();
+  return treesitter::resolveLanguage(languageId);
 }
 
 bool isLanguageSupported(const std::string &languageId) {
-  const auto &map = languageMap();
-  return map.find(toLower(languageId)) != map.end();
+  return treesitter::isLanguageSupported(languageId);
 }
 
 std::vector<CommentSegment> extractComments(const std::string &languageId,
                                             const std::string &text) {
   std::vector<CommentSegment> segments;
-
-  const TSLanguage *language = resolveLanguage(languageId);
-  if (!language) {
-    return segments;
-  }
-
-  TSParser *parser = ts_parser_new();
-  if (!parser) {
+  treesitter::ParsedDocument document(languageId, text);
+  if (!document.isValid()) {
     return segments;
   }
 
-  std::unique_ptr<TSParser, decltype(&ts_parser_delete)> parserGuard(
-      parser, &ts_parser_delete);
-
-  if (!ts_parser_set_language(parser, language)) {
-    return segments;
-  }
-
-  TSTree *tree =
-      ts_parser_parse_string(parser, nullptr, text.c_str(), text.size());
-  if (!tree) {
-    return segments;
-  }
-
-  std::unique_ptr<TSTree, decltype(&ts_tree_delete)> treeGuard(tree,
-                                                               &ts_tree_delete);
-
-  TSNode root = ts_tree_root_node(tree);
-  if (ts_node_is_null(root)) {
-    return segments;
-  }
-  std::vector<TSNode> stack;
-  stack.push_back(root);
-
-  while (!stack.empty()) {
-    TSNode node = stack.back();
-    stack.pop_back();
-
-    if (ts_node_is_null(node)) {
-      continue;
-    }
-
+  treesitter::walkDepthFirst(document.root(), [&](TSNode node) {
     const char *type = ts_node_type(node);
     if (type) {
       std::string_view nodeType(type);
@@ -290,18 +206,11 @@ std::vector<CommentSegment> extractComments(const std::string &languageId,
           segment.sanitized = std::move(segmentText);
           segments.push_back(std::move(segment));
         }
-        continue;
+        return false;
       }
     }
-
-    uint32_t childCount = ts_node_child_count(node);
-    for (uint32_t i = 0; i < childCount; ++i) {
-      TSNode child = ts_node_child(node, i);
-      if (!ts_node_is_null(child)) {
-        stack.push_back(child);
-      }
-    }
-  }
+    return true;
+  });
 
   return segments;
 }
diff --git a/mozuku-lsp/src/document_preprocessor.cpp b/mozuku-lsp/src/document_preprocessor.cpp
new file mode 100644
index 0000000..dc3b240
--- /dev/null
+++ b/mozuku-lsp/src/document_preprocessor.cpp
@@ -0,0 +1,335 @@
+#include "mozuku/analysis/document_preprocessor.hpp"
+#include "encoding_utils.hpp"
+#include "mozuku/treesitter/document.hpp"
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+
+namespace {
+
+struct LocalByteRange {
+  size_t startByte{0};
+  size_t endByte{0};
+};
+
+bool isEscaped(const std::string &text, size_t pos) {
+  size_t count = 0;
+  while (pos > count && text[pos - count - 1] == '\\') {
+    ++count;
+  }
+  return (count % 2) == 1;
+}
+
+size_t findClosingDollar(const std::string &text, size_t pos) {
+  for (size_t i = pos; i < text.size(); ++i) {
+    if (text[i] == '$' && !isEscaped(text, i)) {
+      return i;
+    }
+  }
+  return std::string::npos;
+}
+
+size_t findClosingDoubleDollar(const std::string &text, size_t pos) {
+  for (size_t i = pos; i + 1 < text.size(); ++i) {
+    if (text[i] == '$' && text[i + 1] == '$' && !isEscaped(text, i)) {
+      return i;
+    }
+  }
+  return std::string::npos;
+}
+
+std::string sanitizeLatexCommentText(const std::string &raw) {
+  if (raw.empty()) {
+    return raw;
+  }
+
+  std::string sanitized = raw;
+  sanitized[0] = ' ';
+  size_t idx = 1;
+  while (idx < sanitized.size() && sanitized[idx] == '%') {
+    sanitized[idx] = ' ';
+    ++idx;
+  }
+  while (idx < sanitized.size() &&
+         (sanitized[idx] == ' ' || sanitized[idx] == '\t')) {
+    sanitized[idx] = ' ';
+    ++idx;
+  }
+  return sanitized;
+}
+
+std::vector<MoZuku::comments::CommentSegment>
+collectLatexComments(const std::string &text) {
+  std::vector<MoZuku::comments::CommentSegment> segments;
+  size_t pos = 0;
+  while (pos < text.size()) {
+    size_t lineStart = pos;
+    size_t lineEnd = text.find('\n', pos);
+    if (lineEnd == std::string::npos) {
+      lineEnd = text.size();
+    }
+
+    size_t current = lineStart;
+    bool found = false;
+    while (current < lineEnd) {
+      if (text[current] == '%' && !isEscaped(text, current)) {
+        found = true;
+        break;
+      }
+      ++current;
+    }
+
+    if (found) {
+      MoZuku::comments::CommentSegment segment;
+      segment.startByte = current;
+      segment.endByte = lineEnd;
+      segment.sanitized =
+          sanitizeLatexCommentText(text.substr(current, lineEnd - current));
+      segments.push_back(std::move(segment));
+    }
+
+    if (lineEnd >= text.size()) {
+      break;
+    }
+    pos = lineEnd + 1;
+  }
+
+  return segments;
+}
+
+std::vector<LocalByteRange> collectHtmlContentRanges(const std::string &text) {
+  std::vector<LocalByteRange> ranges;
+  MoZuku::treesitter::ParsedDocument document("html", text);
+  if (!document.isValid()) {
+    return ranges;
+  }
+
+  MoZuku::treesitter::walkDepthFirst(document.root(), [&](TSNode node) {
+    const char *type = ts_node_type(node);
+    if (type && std::strcmp(type, "text") == 0) {
+      size_t start = ts_node_start_byte(node);
+      size_t end = ts_node_end_byte(node);
+      if (start >= end || end > text.size()) {
+        return false;
+      }
+
+      size_t trimmedStart = start;
+      while (trimmedStart < end &&
+             std::isspace(static_cast<unsigned char>(text[trimmedStart]))) {
+        ++trimmedStart;
+      }
+      size_t trimmedEnd = end;
+      while (trimmedEnd > trimmedStart &&
+             std::isspace(static_cast<unsigned char>(text[trimmedEnd - 1]))) {
+        --trimmedEnd;
+      }
+      if (trimmedEnd > trimmedStart) {
+        ranges.push_back({trimmedStart, trimmedEnd});
+      }
+      return false;
+    }
+    return true;
+  });
+
+  return ranges;
+}
+
+std::vector<LocalByteRange> collectLatexContentRanges(const std::string &text) {
+  std::vector<LocalByteRange> ranges;
+  size_t i = 0;
+  while (i < text.size()) {
+    unsigned char c = static_cast<unsigned char>(text[i]);
+    if (c == '%' && !isEscaped(text, i)) {
+      size_t lineEnd = text.find('\n', i);
+      if (lineEnd == std::string::npos) {
+        break;
+      }
+      i = lineEnd + 1;
+      continue;
+    }
+    if (c == '$' && !isEscaped(text, i)) {
+      if (i + 1 < text.size() && text[i + 1] == '$') {
+        size_t closing = findClosingDoubleDollar(text, i + 2);
+        if (closing == std::string::npos) {
+          break;
+        }
+        i = closing + 2;
+        continue;
+      }
+
+      size_t closing = findClosingDollar(text, i + 1);
+      if (closing == std::string::npos) {
+        break;
+      }
+      i = closing + 1;
+      continue;
+    }
+    if (c == '\\') {
+      ++i;
+      while (i < text.size()) {
+        unsigned char ch = static_cast<unsigned char>(text[i]);
+        if (!std::isalpha(ch) && ch != '@') {
+          break;
+        }
+        ++i;
+      }
+      if (i < text.size() && text[i] == '*') {
+        ++i;
+      }
+      continue;
+    }
+    if (c == '{' || c == '}') {
+      ++i;
+      continue;
+    }
+    if (std::isspace(c)) {
+      ++i;
+      continue;
+    }
+
+    size_t start = i;
+    bool advanced = false;
+    while (i < text.size()) {
+      unsigned char d = static_cast<unsigned char>(text[i]);
+      if (d == '\\' || d == '$' || d == '{' || d == '}' ||
+          (d == '%' && !isEscaped(text, i))) {
+        break;
+      }
+      if (d < 0x80 && (std::isspace(d) || std::ispunct(d))) {
+        break;
+      }
+      i += MoZuku::encoding::utf8SequenceLength(d);
+      advanced = true;
+    }
+    if (advanced) {
+      ranges.push_back({start, i});
+      continue;
+    }
+    ++i;
+  }
+
+  return ranges;
+}
+
+std::vector<ByteRange> toByteRanges(const std::vector<LocalByteRange> &ranges) {
+  std::vector<ByteRange> converted;
+  converted.reserve(ranges.size());
+  for (const auto &range : ranges) {
+    converted.push_back(ByteRange{range.startByte, range.endByte});
+  }
+  return converted;
+}
+
+void appendCommentRanges(
+    std::vector<ByteRange> &ranges,
+    const std::vector<MoZuku::comments::CommentSegment> &segments) {
+  ranges.reserve(ranges.size() + segments.size());
+  for (const auto &segment : segments) {
+    ranges.push_back(ByteRange{segment.startByte, segment.endByte});
+  }
+}
+
+std::string buildMaskWithContentRanges(
+    const std::string &text, const std::vector<LocalByteRange> &contentRanges,
+    const std::vector<MoZuku::comments::CommentSegment> &commentSegments) {
+  std::string masked = text;
+  for (char &ch : masked) {
+    if (ch != '\n' && ch != '\r') {
+      ch = ' ';
+    }
+  }
+
+  for (const auto &range : contentRanges) {
+    if (range.startByte >= masked.size()) {
+      continue;
+    }
+    size_t len = std::min(range.endByte - range.startByte,
+                          masked.size() - range.startByte);
+    for (size_t i = 0; i < len; ++i) {
+      masked[range.startByte + i] = text[range.startByte + i];
+    }
+  }
+
+  for (const auto &segment : commentSegments) {
+    if (segment.startByte >= masked.size()) {
+      continue;
+    }
+    size_t len =
+        std::min(segment.sanitized.size(), masked.size() - segment.startByte);
+    for (size_t i = 0; i < len; ++i) {
+      masked[segment.startByte + i] = segment.sanitized[i];
+    }
+  }
+
+  return masked;
+}
+
+std::string buildCommentOnlyMask(
+    const std::string &text,
+    const std::vector<MoZuku::comments::CommentSegment> &segments) {
+  std::string masked = text;
+  for (char &ch : masked) {
+    if (ch != '\n' && ch != '\r') {
+      ch = ' ';
+    }
+  }
+
+  const size_t docSize = masked.size();
+  for (const auto &segment : segments) {
+    if (segment.startByte >= docSize) {
+      continue;
+    }
+    size_t maxCopy =
+        std::min(docSize - segment.startByte, segment.sanitized.size());
+    for (size_t i = 0; i < maxCopy; ++i) {
+      masked[segment.startByte + i] = segment.sanitized[i];
+    }
+  }
+
+  return masked;
+}
+
+} // namespace
+
+namespace MoZuku::analysis {
+
+ProcessedDocument DocumentPreprocessor::prepare(const std::string &languageId,
+                                                const std::string &text) const {
+  ProcessedDocument result;
+  result.analysisText = text;
+
+  if (languageId.empty() || languageId == "japanese") {
+    return result;
+  }
+
+  if (languageId == "html") {
+    result.commentSegments = comments::extractComments(languageId, text);
+    std::vector<LocalByteRange> contentRanges = collectHtmlContentRanges(text);
+    result.contentHighlightRanges = toByteRanges(contentRanges);
+    appendCommentRanges(result.contentHighlightRanges, result.commentSegments);
+    result.analysisText =
+        buildMaskWithContentRanges(text, contentRanges, result.commentSegments);
+    return result;
+  }
+
+  if (languageId == "latex") {
+    result.commentSegments = collectLatexComments(text);
+    std::vector<LocalByteRange> contentRanges = collectLatexContentRanges(text);
+    result.contentHighlightRanges = toByteRanges(contentRanges);
+    appendCommentRanges(result.contentHighlightRanges, result.commentSegments);
+    result.analysisText =
+        buildMaskWithContentRanges(text, contentRanges, result.commentSegments);
+    return result;
+  }
+
+  if (!comments::isLanguageSupported(languageId)) {
+    return result;
+  }
+
+  result.commentSegments = comments::extractComments(languageId, text);
+  result.analysisText = buildCommentOnlyMask(text, result.commentSegments);
+  return result;
+}
+
+} // namespace MoZuku::analysis
diff --git a/mozuku-lsp/src/encoding_utils.cpp b/mozuku-lsp/src/encoding_utils.cpp
index 89f6ea0..b46cc47 100644
--- a/mozuku-lsp/src/encoding_utils.cpp
+++ b/mozuku-lsp/src/encoding_utils.cpp
@@ -1,44 +1,141 @@
 #include "encoding_utils.hpp"
+
+#include <array>
+#include <cctype>
+#include <cerrno>
 #include <cstring>
 #include <iconv.h>
 
+namespace {
+
+std::string normalizeCharsetName(const std::string &charset) {
+  std::string normalized;
+  normalized.reserve(charset.size());
+  for (unsigned char c : charset) {
+    if (std::isalnum(c)) {
+      normalized.push_back(static_cast<char>(std::toupper(c)));
+    }
+  }
+  return normalized;
+}
+
+bool isUtf8Charset(const std::string &charset) {
+  return normalizeCharsetName(charset) == "UTF8";
+}
+
+bool isSameCharset(const std::string &lhs, const std::string &rhs) {
+  return normalizeCharsetName(lhs) == normalizeCharsetName(rhs);
+}
+
+struct IconvCloser {
+  explicit IconvCloser(iconv_t handle) : handle_(handle) {}
+  ~IconvCloser() {
+    if (handle_ != (iconv_t)-1) {
+      iconv_close(handle_);
+    }
+  }
+
+  iconv_t get() const { return handle_; }
+
+private:
+  iconv_t handle_;
+};
+
+void appendBuffer(std::string &result, const std::array<char, 256> &buffer,
+                  size_t remaining) {
+  result.append(buffer.data(), buffer.size() - remaining);
+}
+
+void stripUnsupportedControlChars(std::string &text) {
+  std::string filtered;
+  filtered.reserve(text.size());
+  for (unsigned char c : text) {
+    if (c >= 0x20 || c == 0x09 || c == 0x0A || c == 0x0D || c >= 0x80) {
+      filtered.push_back(static_cast<char>(c));
+    }
+  }
+  text.swap(filtered);
+}
+
+} // namespace
+
 namespace MoZuku {
 namespace encoding {
 
 std::string convertEncoding(const std::string &input,
                             const std::string &fromCharset,
-                            const std::string &toCharset) {
+                            const std::string &toCharset,
+                            ConversionOptions options) {
   if (input.empty())
     return input;
 
+  if (!options.skipInvalidInput && isSameCharset(fromCharset, toCharset)) {
+    return input;
+  }
+
   iconv_t cd = iconv_open(toCharset.c_str(), fromCharset.c_str());
   if (cd == (iconv_t)-1) {
     return input;
   }
+  IconvCloser guard(cd);
 
+  char *inBuf = const_cast<char *>(input.data());
   size_t inBytesLeft = input.size();
-  size_t outBytesLeft = input.size() * 4; // Conservative estimate
+  std::string result;
+  result.reserve(input.size() * 2 + 16);
+  std::array<char, 256> outputBuffer{};
 
-  std::string result(outBytesLeft, '\0');
+  while (true) {
+    char *outBuf = outputBuffer.data();
+    size_t outBytesLeft = outputBuffer.size();
+    size_t status =
+        iconv(guard.get(), &inBuf, &inBytesLeft, &outBuf, &outBytesLeft);
+    appendBuffer(result, outputBuffer, outBytesLeft);
 
-  char *inBuf = const_cast<char *>(input.data());
-  char *outBuf = &result[0];
+    if (status != static_cast<size_t>(-1)) {
+      break;
+    }
+
+    if (errno == E2BIG) {
+      continue;
+    }
+
+    if (options.skipInvalidInput && (errno == EILSEQ || errno == EINVAL)) {
+      if (inBytesLeft == 0) {
+        break;
+      }
+      ++inBuf;
+      --inBytesLeft;
+      continue;
+    }
 
-  if (iconv(cd, &inBuf, &inBytesLeft, &outBuf, &outBytesLeft) == (size_t)-1) {
-    iconv_close(cd);
     return input;
   }
 
-  iconv_close(cd);
+  while (true) {
+    char *outBuf = outputBuffer.data();
+    size_t outBytesLeft = outputBuffer.size();
+    size_t status =
+        iconv(guard.get(), nullptr, nullptr, &outBuf, &outBytesLeft);
+    appendBuffer(result, outputBuffer, outBytesLeft);
+
+    if (status != static_cast<size_t>(-1)) {
+      break;
+    }
+
+    if (errno == E2BIG) {
+      continue;
+    }
+
+    return input;
+  }
 
-  // Resize result to actual converted size
-  result.resize(result.size() - outBytesLeft);
   return result;
 }
 
 std::string systemToUtf8(const std::string &input,
                          const std::string &systemCharset) {
-  if (systemCharset == "UTF-8" || systemCharset.empty()) {
+  if (systemCharset.empty() || isUtf8Charset(systemCharset)) {
     return input;
   }
   return convertEncoding(input, systemCharset, "UTF-8");
@@ -46,11 +143,34 @@ std::string systemToUtf8(const std::string &input,
 
 std::string utf8ToSystem(const std::string &input,
                          const std::string &systemCharset) {
-  if (systemCharset == "UTF-8" || systemCharset.empty()) {
+  if (systemCharset.empty() || isUtf8Charset(systemCharset)) {
     return input;
   }
   return convertEncoding(input, "UTF-8", systemCharset);
 }
 
+std::string sanitizeUtf8(const std::string &input) {
+  std::string sanitized =
+      convertEncoding(input, "UTF-8", "UTF-8", ConversionOptions{true});
+  stripUnsupportedControlChars(sanitized);
+  return sanitized;
+}
+
+size_t utf8SequenceLength(unsigned char c) {
+  if (c < 0x80) {
+    return 1;
+  }
+  if ((c & 0xE0) == 0xC0) {
+    return 2;
+  }
+  if ((c & 0xF0) == 0xE0) {
+    return 3;
+  }
+  if ((c & 0xF8) == 0xF0) {
+    return 4;
+  }
+  return 1;
+}
+
 } // namespace encoding
 } // namespace MoZuku
diff --git a/mozuku-lsp/src/grammar_checker.cpp b/mozuku-lsp/src/grammar_checker.cpp
index a8ce345..eb51cf8 100644
--- a/mozuku-lsp/src/grammar_checker.cpp
+++ b/mozuku-lsp/src/grammar_checker.cpp
@@ -1,7 +1,7 @@
 #include "grammar_checker.hpp"
+#include "mozuku/core/debug.hpp"
 #include "pos_analyzer.hpp"
 #include "utf16.hpp"
-#include <cstdlib>
 #include <iostream>
 
 namespace MoZuku {
@@ -13,69 +13,11 @@ struct RuleContext {
   const std::string &text;
   const std::vector<TokenData> &tokens;
   const std::vector<SentenceBoundary> &sentences;
-  const std::vector<size_t> &lineStarts;
+  const TextOffsetMapper &offsets;
   const std::vector<size_t> &tokenBytePositions;
   int severity{2};
 };
 
-bool isAdversativeGa(const std::string &feature) {
-  // MeCab: 品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用型,活用形,原形,...
-  // 逆接の接続助詞「が」: 助詞,接続助詞,*,*,*,*,が,ガ,ガ
-  int fieldIndex = 0;
-  size_t start = 0;
-  size_t end = 0;
-
-  std::string pos, sub1, base;
-  while (end != std::string::npos) {
-    end = feature.find(',', start);
-    std::string part = feature.substr(
-        start, end == std::string::npos ? std::string::npos : end - start);
-    if (fieldIndex == 0)
-      pos = part;
-    else if (fieldIndex == 1)
-      sub1 = part;
-    else if (fieldIndex == 6)
-      base = part;
-
-    if (end == std::string::npos)
-      break;
-    start = end + 1;
-    ++fieldIndex;
-    if (fieldIndex > 6 && !base.empty()) {
-      break;
-    }
-  }
-
-  return pos == "助詞" && sub1 == "接続助詞" && base == "が";
-}
-
-bool isConjunction(const std::string &feature) {
-  size_t comma = feature.find(',');
-  std::string pos =
-      (comma == std::string::npos) ? feature : feature.substr(0, comma);
-  return pos == "接続詞";
-}
-
-bool isParticle(const std::string &feature) {
-  size_t comma = feature.find(',');
-  std::string pos =
-      (comma == std::string::npos) ? feature : feature.substr(0, comma);
-  return pos == "助詞";
-}
-
-std::string particleKey(const std::string &feature) {
-  // "助詞,格助詞,一般,..." -> "助詞,格助詞"
-  size_t firstComma = feature.find(',');
-  if (firstComma == std::string::npos) {
-    return feature;
-  }
-  size_t secondComma = feature.find(',', firstComma + 1);
-  if (secondComma == std::string::npos) {
-    return feature.substr(0, firstComma);
-  }
-  return feature.substr(0, secondComma);
-}
-
 DetailedPOS parsePos(const std::string &feature) {
   return MoZuku::pos::POSAnalyzer::parseDetailedPOS(feature.c_str(), "UTF-8");
 }
@@ -95,44 +37,21 @@ bool isSpecialRaCase(const DetailedPOS &pos) {
          (pos.baseForm == "来れる" || pos.baseForm == "見れる");
 }
 
-// UTF-16ベースのトークン位置をUTF-8バイトオフセットに変換
-size_t toByteOffset(const TokenData &token, const std::string &text,
-                    const std::vector<size_t> &lineStarts) {
-  if (token.line >= static_cast<int>(lineStarts.size())) {
-    return text.size();
-  }
-
-  size_t lineStart = lineStarts[token.line];
-  size_t bytePos = lineStart;
-  int utf16Pos = 0;
-
-  while (bytePos < text.size() && utf16Pos < token.startChar &&
-         text[bytePos] != '\n') {
-    unsigned char c = static_cast<unsigned char>(text[bytePos]);
-    int seqLen = (c >= 0xF0) ? 4 : (c >= 0xE0) ? 3 : (c >= 0xC0) ? 2 : 1;
-    bytePos += seqLen;
-    utf16Pos += (seqLen == 4) ? 2 : 1;
-  }
-
-  return bytePos;
-}
-
 std::vector<size_t>
 computeTokenBytePositions(const std::vector<TokenData> &tokens,
-                          const std::string &text,
-                          const std::vector<size_t> &lineStarts) {
+                          const TextOffsetMapper &offsetMapper) {
   std::vector<size_t> positions;
   positions.reserve(tokens.size());
   for (const auto &token : tokens) {
-    positions.push_back(toByteOffset(token, text, lineStarts));
+    positions.push_back(offsetMapper.tokenStartByteOffset(token));
   }
   return positions;
 }
 
 Range makeRange(const RuleContext &ctx, size_t startByte, size_t endByte) {
   Range range;
-  range.start = byteOffsetToPosition(ctx.text, ctx.lineStarts, startByte);
-  range.end = byteOffsetToPosition(ctx.text, ctx.lineStarts, endByte);
+  range.start = ctx.offsets.byteOffsetToPosition(startByte);
+  range.end = ctx.offsets.byteOffsetToPosition(endByte);
   return range;
 }
 
@@ -159,16 +78,6 @@ size_t countCommas(const std::string &text) {
 
 } // namespace
 
-static bool isDebugEnabled() {
-  static bool initialized = false;
-  static bool debug = false;
-  if (!initialized) {
-    debug = (std::getenv("MOZUKU_DEBUG") != nullptr);
-    initialized = true;
-  }
-  return debug;
-}
-
 void checkCommaLimit(const RuleContext &ctx, std::vector<Diagnostic> &diags,
                      int limit) {
   if (limit <= 0)
@@ -186,7 +95,7 @@ void checkCommaLimit(const RuleContext &ctx, std::vector<Diagnostic> &diags,
     diag.message = "一文に使用できる読点「、」は最大" + std::to_string(limit) +
                    "個までです (現在" + std::to_string(commaCount) + "個) ";
 
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] Comma limit exceeded in sentence "
                 << sentence.sentenceId << ": count=" << commaCount << "\n";
     }
@@ -203,7 +112,7 @@ void checkAdversativeGa(const RuleContext &ctx, std::vector<Diagnostic> &diags,
   for (const auto &sentence : ctx.sentences) {
     size_t count = 0;
     for (size_t i = 0; i < ctx.tokens.size(); ++i) {
-      if (!isAdversativeGa(ctx.tokens[i].feature)) {
+      if (!pos::POSAnalyzer::isAdversativeGaFeature(ctx.tokens[i].feature)) {
         continue;
       }
       size_t bytePos = ctx.tokenBytePositions[i];
@@ -223,7 +132,7 @@ void checkAdversativeGa(const RuleContext &ctx, std::vector<Diagnostic> &diags,
                    std::to_string(maxCount + 1) + "回以上使われています (" +
                    std::to_string(count) + "回) ";
 
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] Adversative 'が' exceeded in sentence "
                 << sentence.sentenceId << ": count=" << count << "\n";
     }
@@ -252,11 +161,11 @@ void checkDuplicateParticleSurface(const RuleContext &ctx,
         continue;
       }
 
-      if (!isParticle(token.feature)) {
+      if (!pos::POSAnalyzer::isParticleFeature(token.feature)) {
         continue;
       }
 
-      std::string currentKey = particleKey(token.feature);
+      std::string currentKey = pos::POSAnalyzer::particleKey(token.feature);
 
       if (hasLast && token.surface == lastSurface && currentKey == lastKey) {
         ++streak;
@@ -267,7 +176,7 @@ void checkDuplicateParticleSurface(const RuleContext &ctx,
           diag.severity = ctx.severity;
           diag.message = "同じ助詞「" + token.surface + "」が連続しています";
 
-          if (isDebugEnabled()) {
+          if (debug::isEnabled()) {
             std::cerr << "[DEBUG] Duplicate particle '" << token.surface
                       << "' in sentence " << sentence.sentenceId << "\n";
           }
@@ -305,8 +214,9 @@ void checkAdjacentParticles(const RuleContext &ctx,
         continue;
       }
 
-      bool currentIsParticle = isParticle(token.feature);
-      std::string currentKey = particleKey(token.feature);
+      bool currentIsParticle =
+          pos::POSAnalyzer::isParticleFeature(token.feature);
+      std::string currentKey = pos::POSAnalyzer::particleKey(token.feature);
       if (currentIsParticle && prevIsParticle && currentKey == prevKey &&
           bytePos == prevStartByte + prevToken.surface.size()) {
         ++streak;
@@ -317,7 +227,7 @@ void checkAdjacentParticles(const RuleContext &ctx,
           diag.severity = ctx.severity;
           diag.message = "助詞が連続して使われています";
 
-          if (isDebugEnabled()) {
+          if (debug::isEnabled()) {
             std::cerr << "[DEBUG] Consecutive particles '" << prevToken.surface
                       << "' -> '" << token.surface << "' in sentence "
                       << sentence.sentenceId << "\n";
@@ -355,7 +265,7 @@ void checkConjunctionRepeats(const RuleContext &ctx,
 
   for (size_t i = 0; i < ctx.tokens.size(); ++i) {
     const auto &token = ctx.tokens[i];
-    if (!isConjunction(token.feature)) {
+    if (!pos::POSAnalyzer::isConjunctionFeature(token.feature)) {
       continue;
     }
 
@@ -374,7 +284,7 @@ void checkConjunctionRepeats(const RuleContext &ctx,
         diag.severity = ctx.severity;
         diag.message = "同じ接続詞「" + token.surface + "」が連続しています";
 
-        if (isDebugEnabled()) {
+        if (debug::isEnabled()) {
           std::cerr << "[DEBUG] Duplicate conjunction '" << token.surface
                     << "' detected across punctuation\n";
         }
@@ -412,7 +322,7 @@ void checkRaDropping(const RuleContext &ctx, std::vector<Diagnostic> &diags) {
     diag.message = messageRa;
     diags.push_back(std::move(diag));
 
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] Ra-dropping special case detected: "
                 << token.surface << "\n";
     }
@@ -436,7 +346,7 @@ void checkRaDropping(const RuleContext &ctx, std::vector<Diagnostic> &diags) {
       diag.message = messageRa;
       diags.push_back(std::move(diag));
 
-      if (isDebugEnabled()) {
+      if (debug::isEnabled()) {
         std::cerr << "[DEBUG] Ra-dropping detected between tokens '"
                   << prevToken.surface << "' + '" << token.surface << "'\n";
       }
@@ -456,9 +366,9 @@ void GrammarChecker::checkGrammar(
     return;
   }
 
-  std::vector<size_t> lineStarts = computeLineStarts(text);
+  TextOffsetMapper offsetMapper(text);
   std::vector<size_t> tokenBytePositions =
-      computeTokenBytePositions(tokens, text, lineStarts);
+      computeTokenBytePositions(tokens, offsetMapper);
 
   // ルール共通設定 (現状は警告レベル固定)
   const int severity = 2; // Warning
@@ -468,7 +378,7 @@ void GrammarChecker::checkGrammar(
     return;
   }
 
-  RuleContext ctx{text,    tokens, sentences, lineStarts, tokenBytePositions,
+  RuleContext ctx{text,    tokens, sentences, offsetMapper, tokenBytePositions,
                   severity};
 
   if (config && config->analysis.rules.commaLimit) {
diff --git a/mozuku-lsp/src/lsp.cpp b/mozuku-lsp/src/lsp.cpp
index 6e0ea84..960595b 100644
--- a/mozuku-lsp/src/lsp.cpp
+++ b/mozuku-lsp/src/lsp.cpp
@@ -1,38 +1,23 @@
 #include "lsp.hpp"
 #include "analyzer.hpp"
 #include "comment_extractor.hpp"
+#include "mozuku/core/debug.hpp"
+#include "pos_analyzer.hpp"
 #include "utf16.hpp"
 #include "wikipedia.hpp"
 
 #include <algorithm>
-#include <cctype>
+#include <cstdlib>
 #include <iostream>
 #include <set>
 #include <sstream>
 #include <string>
 #include <thread>
 
-#include <tree_sitter/api.h>
-
 using nlohmann::json;
 
-static bool isDebugEnabled() {
-  static bool initialized = false;
-  static bool debug = false;
-  if (!initialized) {
-    debug = (std::getenv("MOZUKU_DEBUG") != nullptr);
-    initialized = true;
-  }
-  return debug;
-}
-
 namespace {
 
-struct LocalByteRange {
-  size_t startByte{0};
-  size_t endByte{0};
-};
-
 bool readBoolOption(const json &obj, const char *key, bool &out) {
   if (!obj.contains(key)) {
     return false;
@@ -52,277 +37,6 @@ bool readBoolOption(const json &obj, const char *key, bool &out) {
   return false;
 }
 
-bool isEscaped(const std::string &text, size_t pos) {
-  size_t count = 0;
-  while (pos > count && text[pos - count - 1] == '\\') {
-    ++count;
-  }
-  return (count % 2) == 1;
-}
-
-size_t findClosingDollar(const std::string &text, size_t pos) {
-  for (size_t i = pos; i < text.size(); ++i) {
-    if (text[i] == '$' && !isEscaped(text, i)) {
-      return i;
-    }
-  }
-  return std::string::npos;
-}
-
-size_t findClosingDoubleDollar(const std::string &text, size_t pos) {
-  for (size_t i = pos; i + 1 < text.size(); ++i) {
-    if (text[i] == '$' && text[i + 1] == '$' && !isEscaped(text, i)) {
-      return i;
-    }
-  }
-  return std::string::npos;
-}
-
-size_t findClosingCommand(const std::string &text, size_t pos,
-                          const std::string &closing) {
-  size_t current = pos;
-  while (current < text.size()) {
-    size_t found = text.find(closing, current);
-    if (found == std::string::npos)
-      return std::string::npos;
-    if (!isEscaped(text, found))
-      return found;
-    current = found + closing.size();
-  }
-  return std::string::npos;
-}
-
-std::string processLatexMath(const std::string &text) { return text; }
-
-std::string sanitizeLatexCommentText(const std::string &raw) {
-  if (raw.empty())
-    return raw;
-
-  std::string sanitized = raw;
-  sanitized[0] = ' ';
-  size_t idx = 1;
-  while (idx < sanitized.size() && sanitized[idx] == '%') {
-    sanitized[idx] = ' ';
-    ++idx;
-  }
-  while (idx < sanitized.size() &&
-         (sanitized[idx] == ' ' || sanitized[idx] == '\t')) {
-    sanitized[idx] = ' ';
-    ++idx;
-  }
-  return sanitized;
-}
-
-std::vector<MoZuku::comments::CommentSegment>
-collectLatexComments(const std::string &text) {
-  std::vector<MoZuku::comments::CommentSegment> segments;
-  size_t pos = 0;
-  while (pos < text.size()) {
-    size_t lineStart = pos;
-    size_t lineEnd = text.find('\n', pos);
-    if (lineEnd == std::string::npos)
-      lineEnd = text.size();
-
-    size_t current = lineStart;
-    bool found = false;
-    while (current < lineEnd) {
-      if (text[current] == '%' && !isEscaped(text, current)) {
-        found = true;
-        break;
-      }
-      ++current;
-    }
-
-    if (found) {
-      MoZuku::comments::CommentSegment segment;
-      segment.startByte = current;
-      segment.endByte = lineEnd;
-      segment.sanitized =
-          sanitizeLatexCommentText(text.substr(current, lineEnd - current));
-      segments.push_back(std::move(segment));
-    }
-
-    if (lineEnd >= text.size())
-      break;
-    pos = lineEnd + 1;
-  }
-
-  return segments;
-}
-
-size_t utf8CharLen(unsigned char c) {
-  if (c < 0x80)
-    return 1;
-  if ((c >> 5) == 0x6)
-    return 2;
-  if ((c >> 4) == 0xE)
-    return 3;
-  if ((c >> 3) == 0x1E)
-    return 4;
-  return 1;
-}
-
-std::vector<LocalByteRange> collectHtmlContentRanges(const std::string &text) {
-  std::vector<LocalByteRange> ranges;
-  const TSLanguage *language = MoZuku::comments::resolveLanguage("html");
-  if (!language)
-    return ranges;
-
-  TSParser *parser = ts_parser_new();
-  if (!parser)
-    return ranges;
-
-  std::unique_ptr<TSParser, decltype(&ts_parser_delete)> parserGuard(
-      parser, &ts_parser_delete);
-  if (!ts_parser_set_language(parser, language)) {
-    return ranges;
-  }
-
-  TSTree *tree =
-      ts_parser_parse_string(parser, nullptr, text.c_str(), text.size());
-  if (!tree)
-    return ranges;
-
-  std::unique_ptr<TSTree, decltype(&ts_tree_delete)> treeGuard(tree,
-                                                               &ts_tree_delete);
-
-  TSNode root = ts_tree_root_node(tree);
-  if (ts_node_is_null(root))
-    return ranges;
-
-  std::vector<TSNode> stack;
-  stack.push_back(root);
-
-  while (!stack.empty()) {
-    TSNode node = stack.back();
-    stack.pop_back();
-
-    if (ts_node_is_null(node))
-      continue;
-
-    const char *type = ts_node_type(node);
-    if (type && std::strcmp(type, "text") == 0) {
-      size_t start = ts_node_start_byte(node);
-      size_t end = ts_node_end_byte(node);
-      if (start >= end || end > text.size())
-        continue;
-
-      size_t trimmedStart = start;
-      while (trimmedStart < end &&
-             std::isspace(static_cast<unsigned char>(text[trimmedStart]))) {
-        ++trimmedStart;
-      }
-      size_t trimmedEnd = end;
-      while (trimmedEnd > trimmedStart &&
-             std::isspace(static_cast<unsigned char>(text[trimmedEnd - 1]))) {
-        --trimmedEnd;
-      }
-      if (trimmedEnd > trimmedStart) {
-        ranges.push_back({trimmedStart, trimmedEnd});
-      }
-      continue;
-    }
-
-    uint32_t childCount = ts_node_child_count(node);
-    for (uint32_t i = 0; i < childCount; ++i) {
-      TSNode child = ts_node_child(node, i);
-      if (!ts_node_is_null(child)) {
-        stack.push_back(child);
-      }
-    }
-  }
-
-  return ranges;
-}
-
-std::vector<LocalByteRange> collectLatexContentRanges(const std::string &text) {
-  std::vector<LocalByteRange> ranges;
-  size_t i = 0;
-  while (i < text.size()) {
-    unsigned char c = static_cast<unsigned char>(text[i]);
-    if (c == '%' && !isEscaped(text, i)) {
-      size_t lineEnd = text.find('\n', i);
-      if (lineEnd == std::string::npos)
-        break;
-      i = lineEnd + 1;
-      continue;
-    }
-    if (c == '$' && !isEscaped(text, i)) {
-      if (i + 1 < text.size() && text[i + 1] == '$') {
-        size_t closing = findClosingDoubleDollar(text, i + 2);
-        if (closing == std::string::npos)
-          break;
-        i = closing + 2;
-        continue;
-      } else {
-        size_t closing = findClosingDollar(text, i + 1);
-        if (closing == std::string::npos)
-          break;
-        i = closing + 1;
-        continue;
-      }
-    }
-    if (c == '\\') {
-      ++i;
-      while (i < text.size()) {
-        unsigned char ch = static_cast<unsigned char>(text[i]);
-        if (!std::isalpha(ch) && ch != '@')
-          break;
-        ++i;
-      }
-      if (i < text.size() && text[i] == '*')
-        ++i;
-      continue;
-    }
-    if (c == '{' || c == '}') {
-      ++i;
-      continue;
-    }
-    if (std::isspace(c)) {
-      ++i;
-      continue;
-    }
-
-    size_t start = i;
-    bool advanced = false;
-    while (i < text.size()) {
-      unsigned char d = static_cast<unsigned char>(text[i]);
-      if (d == '\\' || d == '$' || d == '{' || d == '}' ||
-          (d == '%' && !isEscaped(text, i))) {
-        break;
-      }
-      if (d < 0x80) {
-        if (std::isspace(d) || std::ispunct(d))
-          break;
-      }
-      size_t len = utf8CharLen(d);
-      i += len;
-      advanced = true;
-    }
-    if (advanced) {
-      ranges.push_back({start, i});
-      continue;
-    }
-    // ensure progress to avoid infinite loop
-    if (!advanced)
-      ++i;
-  }
-
-  return ranges;
-}
-
-std::vector<LocalByteRange>
-collectContentHighlightRanges(const std::string &languageId,
-                              const std::string &text) {
-  if (languageId == "html") {
-    return collectHtmlContentRanges(text);
-  }
-  if (languageId == "latex") {
-    return collectLatexContentRanges(text);
-  }
-  return {};
-}
-
 } // namespace
 
 LSPServer::LSPServer(std::istream &in, std::ostream &out) : in_(in), out_(out) {
@@ -419,7 +133,7 @@ void LSPServer::run() {
       json req = json::parse(jsonPayload);
       handle(req);
     } catch (const json::parse_error &e) {
-      if (isDebugEnabled()) {
+      if (MoZuku::debug::isEnabled()) {
         std::cerr << "[DEBUG] JSON parse error: " << e.what() << std::endl;
       }
     }
@@ -537,22 +251,48 @@ void LSPServer::onInitialized() {
   // 初期化完了
 }
 
+LSPServer::DocumentState &LSPServer::ensureDocument(const std::string &uri) {
+  return documents_[uri];
+}
+
+LSPServer::DocumentState *LSPServer::findDocument(const std::string &uri) {
+  auto it = documents_.find(uri);
+  return it == documents_.end() ? nullptr : &it->second;
+}
+
+const LSPServer::DocumentState *
+LSPServer::findDocument(const std::string &uri) const {
+  auto it = documents_.find(uri);
+  return it == documents_.end() ? nullptr : &it->second;
+}
+
+bool LSPServer::isJapaneseLanguage(const DocumentState &document) {
+  return document.languageId == "japanese";
+}
+
 void LSPServer::onDidOpen(const json &params) {
   std::string uri = params["textDocument"]["uri"];
   std::string text = params["textDocument"]["text"];
-  docs_[uri] = text;
+  auto &document = ensureDocument(uri);
+  document.text = text;
+  document.tokens.clear();
+  document.tokensCached = false;
+  document.diagnosticsByLine.clear();
   if (params["textDocument"].contains("languageId") &&
       params["textDocument"]["languageId"].is_string()) {
-    docLanguages_[uri] = params["textDocument"]["languageId"];
+    document.languageId = params["textDocument"]["languageId"];
+  } else {
+    document.languageId.clear();
   }
-  analyzeAndPublish(uri, text);
+  analyzeAndPublish(uri);
 }
 
 void LSPServer::onDidChange(const json &params) {
   std::string uri = params["textDocument"]["uri"];
   auto changes = params["contentChanges"];
 
-  std::string &text = docs_[uri];
+  auto &document = ensureDocument(uri);
+  std::string &text = document.text;
   std::string oldText = text;
 
   // 位置を維持するため変更を逆順に適用
@@ -566,8 +306,8 @@ void LSPServer::onDidChange(const json &params) {
       int endLine = range["end"]["line"];
       int endChar = range["end"]["character"];
 
-      size_t startOffset = computeByteOffset(text, startLine, startChar);
-      size_t endOffset = computeByteOffset(text, endLine, endChar);
+      size_t startOffset = positionToByteOffset(text, startLine, startChar);
+      size_t endOffset = positionToByteOffset(text, endLine, endChar);
 
       std::string newText = change["text"];
       text.replace(startOffset, endOffset - startOffset, newText);
@@ -577,25 +317,28 @@ void LSPServer::onDidChange(const json &params) {
     }
   }
 
+  document.tokensCached = false;
+  document.tokens.clear();
+
   // 最適化: 変更された行のみ再解析
   analyzeChangedLines(uri, text, oldText);
 }
 
 void LSPServer::onDidSave(const json &params) {
   std::string uri = params["textDocument"]["uri"];
-  if (docs_.find(uri) != docs_.end()) {
-    analyzeAndPublish(uri, docs_[uri]);
+  if (findDocument(uri) != nullptr) {
+    analyzeAndPublish(uri);
   }
 }
 
 json LSPServer::onSemanticTokensFull(const json &id, const json &params) {
   std::string uri = params["textDocument"]["uri"];
-  if (docs_.find(uri) == docs_.end()) {
+  const auto *document = findDocument(uri);
+  if (!document) {
     return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", nullptr}};
   }
 
-  auto langIt = docLanguages_.find(uri);
-  if (langIt == docLanguages_.end() || langIt->second != "japanese") {
+  if (!isJapaneseLanguage(*document)) {
     return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", nullptr}};
   }
 
@@ -605,12 +348,12 @@ json LSPServer::onSemanticTokensFull(const json &id, const json &params) {
 
 json LSPServer::onSemanticTokensRange(const json &id, const json &params) {
   std::string uri = params["textDocument"]["uri"];
-  if (docs_.find(uri) == docs_.end()) {
+  const auto *document = findDocument(uri);
+  if (!document) {
     return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", nullptr}};
   }
 
-  auto langIt = docLanguages_.find(uri);
-  if (langIt == docLanguages_.end() || langIt->second != "japanese") {
+  if (!isJapaneseLanguage(*document)) {
     return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", nullptr}};
   }
 
@@ -618,70 +361,36 @@ json LSPServer::onSemanticTokensRange(const json &id, const json &params) {
   return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", {{"data", tokens}}}};
 }
 
-bool isNoun(const std::string &tokenType, const std::string &feature) {
-  // tokenTypeが "noun" の場合
-  if (tokenType == "noun") {
-    return true;
-  }
-
-  // MeCabのfeature文字列から品詞を判定
-  // feature形式:
-  // "品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用型,活用形,原形,読み,発音"
-  if (!feature.empty()) {
-    size_t commaPos = feature.find(',');
-    if (commaPos != std::string::npos) {
-      std::string mainPOS = feature.substr(0, commaPos);
-      return mainPOS == "名詞";
-    }
-  }
-
-  return false;
-}
-
 json LSPServer::onHover(const json &id, const json &params) {
   std::string uri = params["textDocument"]["uri"];
-  if (docs_.find(uri) == docs_.end() ||
-      docTokens_.find(uri) == docTokens_.end()) {
+  const auto *document = findDocument(uri);
+  if (!document || !document->tokensCached) {
     return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", nullptr}};
   }
 
   int line = params["position"]["line"];
   int character = params["position"]["character"];
 
-  const auto docIt = docs_.find(uri);
-  if (docIt == docs_.end()) {
-    return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", nullptr}};
-  }
-
   // japanese 以外の言語では、コメント/コンテンツ範囲内でのみ hover を表示
   // (HTML: タグ内テキスト、LaTeX: タグ・数式以外のテキスト、その他: コメント内)
-  auto langIt = docLanguages_.find(uri);
-  bool isJapanese =
-      (langIt != docLanguages_.end() && langIt->second == "japanese");
+  bool isJapanese = isJapaneseLanguage(*document);
 
   if (!isJapanese) {
-    size_t offset = computeByteOffset(docIt->second, line, character);
+    size_t offset = positionToByteOffset(document->text, line, character);
     bool insideComment = false;
-    const auto segmentsIt = docCommentSegments_.find(uri);
-    if (segmentsIt != docCommentSegments_.end()) {
-      for (const auto &segment : segmentsIt->second) {
-        if (offset >= segment.startByte && offset < segment.endByte) {
-          insideComment = true;
-          break;
-        }
+    for (const auto &segment : document->commentSegments) {
+      if (offset >= segment.startByte && offset < segment.endByte) {
+        insideComment = true;
+        break;
       }
     }
 
     bool insideContent = false;
-    if (langIt != docLanguages_.end() &&
-        (langIt->second == "html" || langIt->second == "latex")) {
-      const auto contentIt = docContentHighlightRanges_.find(uri);
-      if (contentIt != docContentHighlightRanges_.end()) {
-        for (const auto &range : contentIt->second) {
-          if (offset >= range.startByte && offset < range.endByte) {
-            insideContent = true;
-            break;
-          }
+    if (document->languageId == "html" || document->languageId == "latex") {
+      for (const auto &range : document->contentHighlightRanges) {
+        if (offset >= range.startByte && offset < range.endByte) {
+          insideContent = true;
+          break;
         }
       }
     }
@@ -692,7 +401,7 @@ json LSPServer::onHover(const json &id, const json &params) {
   }
 
   // 位置にあるトークンを検索
-  const auto &tokens = docTokens_[uri];
+  const auto &tokens = document->tokens;
   for (const auto &token : tokens) {
     if (token.line == line && character >= token.startChar &&
         character < token.endChar) {
@@ -712,7 +421,8 @@ json LSPServer::onHover(const json &id, const json &params) {
       }
 
       // 名詞の場合、Wikipediaサマリを追加
-      if (isNoun(token.tokenType, token.feature)) {
+      if (token.tokenType == "noun" ||
+          MoZuku::pos::POSAnalyzer::isNounFeature(token.feature)) {
         std::string query =
             token.baseForm.empty() ? token.surface : token.baseForm;
 
@@ -730,7 +440,7 @@ json LSPServer::onHover(const json &id, const json &params) {
                             cached_entry->response_code);
           }
         } else {
-          if (isDebugEnabled()) {
+          if (MoZuku::debug::isEnabled()) {
             std::cerr << "[DEBUG] fetching Wikipedia: " << query << std::endl;
           }
 
@@ -739,13 +449,13 @@ json LSPServer::onHover(const json &id, const json &params) {
           std::thread([query, future = std::move(future)]() mutable {
             try {
               auto result = future.get();
-              if (isDebugEnabled()) {
+              if (MoZuku::debug::isEnabled()) {
                 std::cerr << "[DEBUG] Wikipedia取得完了: " << query
                           << ", ステータス: " << result.response_code
                           << std::endl;
               }
             } catch (const std::exception &e) {
-              if (isDebugEnabled()) {
+              if (MoZuku::debug::isEnabled()) {
                 std::cerr << "[DEBUG] Wikipedia取得失敗: " << query
                           << ", エラー: " << e.what() << std::endl;
               }
@@ -754,71 +464,61 @@ json LSPServer::onHover(const json &id, const json &params) {
         }
       }
 
-      return json{
-          {"jsonrpc", "2.0"},
-          {"id", id},
-          {"result",
-           {{"contents", {{"kind", "markdown"}, {"value", markdown.str()}}},
-            {"range",
-             {{"start", {{"line", token.line}, {"character", token.startChar}}},
-              {"end",
-               {{"line", token.line}, {"character", token.endChar}}}}}}}};
+      return json{{"jsonrpc", "2.0"},
+                  {"id", id},
+                  {"result", presenter_.hoverResult(token, markdown.str())}};
     }
   }
 
   return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", nullptr}};
 }
 
-void LSPServer::analyzeAndPublish(const std::string &uri,
-                                  const std::string &text) {
+void LSPServer::analyzeAndPublish(const std::string &uri) {
+  auto &document = ensureDocument(uri);
+  const std::string &text = document.text;
+
   if (!analyzer_->isInitialized()) {
     analyzer_->initialize(config_);
   }
 
-  std::string analysisText = prepareAnalysisText(uri, text);
-
-  std::vector<TokenData> tokens = analyzer_->analyzeText(analysisText);
-  std::vector<Diagnostic> diags = analyzer_->checkGrammar(analysisText);
+  auto prepared = prepareDocument(document);
 
-  docTokens_[uri] = tokens;
-  cacheDiagnostics(uri, diags);
+  std::vector<TokenData> tokens = analyzer_->analyzeText(prepared.analysisText);
+  std::vector<Diagnostic> diags =
+      analyzer_->checkGrammar(prepared.analysisText);
 
-  // 診断情報を配信
-  json diagnostics = json::array();
-  for (const auto &diag : diags) {
-    diagnostics.push_back({{"range",
-                            {{"start",
-                              {{"line", diag.range.start.line},
-                               {"character", diag.range.start.character}}},
-                             {"end",
-                              {{"line", diag.range.end.line},
-                               {"character", diag.range.end.character}}}}},
-                           {"severity", diag.severity},
-                           {"message", diag.message}});
-  }
+  document.tokens = tokens;
+  document.tokensCached = true;
+  cacheDiagnostics(document, diags);
 
   notify("textDocument/publishDiagnostics",
-         {{"uri", uri}, {"diagnostics", diagnostics}});
+         presenter_.publishDiagnosticsParams(uri, diags));
 
   // コンテンツ範囲を通知 (コメント範囲 or HTML/LaTeX のコンテンツ範囲)
   // HTML: タグ内テキスト、LaTeX: タグ・数式以外のテキスト
-  const auto segmentsIt = docCommentSegments_.find(uri);
-  if (segmentsIt != docCommentSegments_.end()) {
-    sendCommentHighlights(uri, text, segmentsIt->second);
+  static const std::vector<MoZuku::comments::CommentSegment> kEmptySegments;
+  if (!document.commentSegments.empty()) {
+    notify("mozuku/commentHighlights",
+           presenter_.commentHighlightsParams(uri, text,
+                                              document.commentSegments));
   } else {
-    static const std::vector<MoZuku::comments::CommentSegment> kEmptySegments;
-    sendCommentHighlights(uri, text, kEmptySegments);
+    notify("mozuku/commentHighlights",
+           presenter_.commentHighlightsParams(uri, text, kEmptySegments));
   }
 
-  const auto contentIt = docContentHighlightRanges_.find(uri);
-  if (contentIt != docContentHighlightRanges_.end()) {
-    sendContentHighlights(uri, text, contentIt->second);
+  static const std::vector<ByteRange> kEmptyContent;
+  if (!document.contentHighlightRanges.empty()) {
+    notify("mozuku/contentHighlights",
+           presenter_.contentHighlightsParams(uri, text,
+                                              document.contentHighlightRanges));
   } else {
-    static const std::vector<ByteRange> kEmptyContent;
-    sendContentHighlights(uri, text, kEmptyContent);
+    notify("mozuku/contentHighlights",
+           presenter_.contentHighlightsParams(uri, text, kEmptyContent));
   }
 
-  sendSemanticHighlights(uri, tokens);
+  bool isJapanese = isJapaneseLanguage(document);
+  notify("mozuku/semanticHighlights",
+         presenter_.semanticHighlightsParams(uri, isJapanese, tokens));
 }
 
 void LSPServer::analyzeChangedLines(const std::string &uri,
@@ -828,319 +528,67 @@ void LSPServer::analyzeChangedLines(const std::string &uri,
   std::set<int> changedLines = findChangedLines(oldText, newText);
 
   // 変更行の診断情報を削除
-  removeDiagnosticsForLines(uri, changedLines);
+  if (auto *document = findDocument(uri)) {
+    removeDiagnosticsForLines(*document, changedLines);
+  }
 
   // 現在は文書全体を再解析
   // TODO: パフォーマンス向上のため行単位の解析を実装
-  analyzeAndPublish(uri, newText);
-}
-
-std::string LSPServer::prepareAnalysisText(const std::string &uri,
-                                           const std::string &text) {
-  auto langIt = docLanguages_.find(uri);
-  if (langIt == docLanguages_.end()) {
-    docCommentSegments_.erase(uri);
-    docContentHighlightRanges_.erase(uri);
-    return text;
-  }
-
-  const std::string &languageId = langIt->second;
-  if (languageId == "japanese") {
-    docCommentSegments_.erase(uri);
-    docContentHighlightRanges_.erase(uri);
-    return text;
-  }
-
-  // HTML: ドキュメント本文をハイライト (<div>text</div> の text 部分)
-  if (languageId == "html") {
-    std::vector<MoZuku::comments::CommentSegment> commentSegments =
-        MoZuku::comments::extractComments(languageId, text);
-    docCommentSegments_[uri] = commentSegments;
-
-    std::vector<LocalByteRange> contentRanges = collectHtmlContentRanges(text);
-    std::vector<ByteRange> contentByteRanges;
-    contentByteRanges.reserve(contentRanges.size());
-    for (const auto &range : contentRanges) {
-      contentByteRanges.push_back(ByteRange{range.startByte, range.endByte});
-    }
-    // コメントも本文ハイライト対象に含める (クライアント側で装飾しやすくする)
-    for (const auto &segment : commentSegments) {
-      contentByteRanges.push_back(
-          ByteRange{segment.startByte, segment.endByte});
-    }
-    docContentHighlightRanges_[uri] = std::move(contentByteRanges);
-
-    // 全体をマスクしてコンテンツ部分のみ復元
-    std::string masked = text;
-    for (char &ch : masked) {
-      if (ch != '\n' && ch != '\r') {
-        ch = ' ';
-      }
-    }
-
-    for (const auto &range : contentRanges) {
-      if (range.startByte >= masked.size())
-        continue;
-      size_t len = std::min(range.endByte - range.startByte,
-                            masked.size() - range.startByte);
-      for (size_t i = 0; i < len; ++i) {
-        masked[range.startByte + i] = text[range.startByte + i];
-      }
-    }
-
-    for (const auto &segment : commentSegments) {
-      if (segment.startByte >= masked.size())
-        continue;
-      size_t len =
-          std::min(segment.sanitized.size(), masked.size() - segment.startByte);
-      for (size_t i = 0; i < len; ++i) {
-        masked[segment.startByte + i] = segment.sanitized[i];
-      }
-    }
-
-    return masked;
-  }
-
-  // LaTeX: ドキュメント本文をハイライト (タグ・数式を除くテキスト部分)
-  if (languageId == "latex") {
-    std::vector<MoZuku::comments::CommentSegment> commentSegments =
-        collectLatexComments(text);
-    docCommentSegments_[uri] = commentSegments;
-
-    std::vector<LocalByteRange> contentRanges = collectLatexContentRanges(text);
-    std::vector<ByteRange> contentByteRanges;
-    contentByteRanges.reserve(contentRanges.size());
-    for (const auto &range : contentRanges) {
-      contentByteRanges.push_back(ByteRange{range.startByte, range.endByte});
-    }
-    for (const auto &segment : commentSegments) {
-      contentByteRanges.push_back(
-          ByteRange{segment.startByte, segment.endByte});
-    }
-    docContentHighlightRanges_[uri] = std::move(contentByteRanges);
-
-    // 全体をマスクしてコンテンツ部分のみ復元
-    std::string masked = text;
-    for (char &ch : masked) {
-      if (ch != '\n' && ch != '\r') {
-        ch = ' ';
-      }
-    }
-
-    for (const auto &range : contentRanges) {
-      if (range.startByte >= masked.size())
-        continue;
-      size_t len = std::min(range.endByte - range.startByte,
-                            masked.size() - range.startByte);
-      for (size_t i = 0; i < len; ++i) {
-        masked[range.startByte + i] = text[range.startByte + i];
-      }
-    }
-
-    for (const auto &segment : commentSegments) {
-      if (segment.startByte >= masked.size())
-        continue;
-      size_t len =
-          std::min(segment.sanitized.size(), masked.size() - segment.startByte);
-      for (size_t i = 0; i < len; ++i) {
-        masked[segment.startByte + i] = segment.sanitized[i];
-      }
-    }
-
-    return masked;
-  }
-
-  if (!MoZuku::comments::isLanguageSupported(languageId)) {
-    docCommentSegments_.erase(uri);
-    docContentHighlightRanges_.erase(uri);
-    return text;
-  }
-
-  // その他の言語: コメント部分をハイライト
-  std::vector<MoZuku::comments::CommentSegment> segments =
-      MoZuku::comments::extractComments(languageId, text);
-  docCommentSegments_[uri] = segments;
-  docContentHighlightRanges_.erase(uri);
-
-  std::string masked = text;
-  for (char &ch : masked) {
-    if (ch != '\n' && ch != '\r') {
-      ch = ' ';
-    }
-  }
-
-  if (segments.empty()) {
-    return masked;
-  }
-
-  const size_t docSize = masked.size();
-  for (const auto &segment : segments) {
-    if (segment.startByte >= docSize) {
-      continue;
-    }
-    const std::string &sanitized = segment.sanitized;
-    size_t maxCopy = std::min(docSize - segment.startByte, sanitized.size());
-    for (size_t i = 0; i < maxCopy; ++i) {
-      masked[segment.startByte + i] = sanitized[i];
-    }
-  }
-
-  return masked;
-}
-
-void LSPServer::sendCommentHighlights(
-    const std::string &uri, const std::string &text,
-    const std::vector<MoZuku::comments::CommentSegment> &segments) {
-  json ranges = json::array();
-
-  std::vector<size_t> lineStarts = computeLineStarts(text);
-  for (const auto &segment : segments) {
-    Position start = byteOffsetToPosition(text, lineStarts, segment.startByte);
-    Position end = byteOffsetToPosition(text, lineStarts, segment.endByte);
-
-    json range = {
-        {"start", {{"line", start.line}, {"character", start.character}}},
-        {"end", {{"line", end.line}, {"character", end.character}}}};
-    ranges.push_back(std::move(range));
-  }
-
-  notify("mozuku/commentHighlights", {{"uri", uri}, {"ranges", ranges}});
-}
-
-void LSPServer::sendContentHighlights(const std::string &uri,
-                                      const std::string &text,
-                                      const std::vector<ByteRange> &ranges) {
-  json lspRanges = json::array();
-
-  std::vector<size_t> lineStarts = computeLineStarts(text);
-  for (const auto &range : ranges) {
-    Position start = byteOffsetToPosition(text, lineStarts, range.startByte);
-    Position end = byteOffsetToPosition(text, lineStarts, range.endByte);
-
-    lspRanges.push_back(
-        {{"start", {{"line", start.line}, {"character", start.character}}},
-         {"end", {{"line", end.line}, {"character", end.character}}}});
-  }
-
-  notify("mozuku/contentHighlights", {{"uri", uri}, {"ranges", lspRanges}});
+  analyzeAndPublish(uri);
 }
 
-void LSPServer::sendSemanticHighlights(const std::string &uri,
-                                       const std::vector<TokenData> &tokens) {
-  auto langIt = docLanguages_.find(uri);
-  bool isJapanese =
-      (langIt != docLanguages_.end() && langIt->second == "japanese");
-
-  // japanese の場合のみセマンティックハイライトを無効化
-  // (.ja.txt, .ja.md は LSP 側のセマンティックトークンを使用)
-  // HTML/LaTeX など他の言語は VS Code 拡張側の上塗りハイライトを使用
-  if (isJapanese) {
-    notify("mozuku/semanticHighlights",
-           {{"uri", uri}, {"tokens", json::array()}});
-    return;
+MoZuku::analysis::ProcessedDocument
+LSPServer::prepareDocument(DocumentState &document) {
+  if (document.languageId.empty()) {
+    document.commentSegments.clear();
+    document.contentHighlightRanges.clear();
+    return {document.text, {}, {}};
   }
 
-  json tokenEntries = json::array();
-  for (const auto &token : tokens) {
-    tokenEntries.push_back(
-        {{"range",
-          {{"start", {{"line", token.line}, {"character", token.startChar}}},
-           {"end", {{"line", token.line}, {"character", token.endChar}}}}},
-         {"type", token.tokenType},
-         {"modifiers", token.tokenModifiers}});
-  }
+  auto prepared = preprocessor_.prepare(document.languageId, document.text);
+  document.commentSegments = prepared.commentSegments;
+  document.contentHighlightRanges = prepared.contentHighlightRanges;
 
-  notify("mozuku/semanticHighlights", {{"uri", uri}, {"tokens", tokenEntries}});
+  return prepared;
 }
 
 json LSPServer::buildSemanticTokens(const std::string &uri) {
-  auto docIt = docs_.find(uri);
-  if (docIt == docs_.end()) {
+  auto *document = findDocument(uri);
+  if (!document) {
     return json::array();
   }
 
-  auto cached = docTokens_.find(uri);
-  if (cached != docTokens_.end()) {
-    return buildSemanticTokensFromTokens(cached->second);
+  if (document->tokensCached) {
+    return presenter_.semanticTokensData(document->tokens, tokenTypes_);
   }
 
   if (!analyzer_->isInitialized()) {
     analyzer_->initialize(config_);
   }
 
-  std::string analysisText = prepareAnalysisText(uri, docIt->second);
-  std::vector<TokenData> tokens = analyzer_->analyzeText(analysisText);
-  docTokens_[uri] = tokens;
-
-  return buildSemanticTokensFromTokens(tokens);
-}
-
-json LSPServer::buildSemanticTokensFromTokens(
-    const std::vector<TokenData> &tokens) {
-  json data = json::array();
+  auto prepared = prepareDocument(*document);
+  std::vector<TokenData> tokens = analyzer_->analyzeText(prepared.analysisText);
+  document->tokens = tokens;
+  document->tokensCached = true;
 
-  int prevLine = 0, prevChar = 0;
-
-  for (const auto &token : tokens) {
-    int deltaLine = token.line - prevLine;
-    int deltaChar =
-        (deltaLine == 0) ? token.startChar - prevChar : token.startChar;
-
-    auto typeIt =
-        std::find(tokenTypes_.begin(), tokenTypes_.end(), token.tokenType);
-    int typeIndex =
-        (typeIt != tokenTypes_.end())
-            ? static_cast<int>(std::distance(tokenTypes_.begin(), typeIt))
-            : 0;
-
-    data.push_back(deltaLine);
-    data.push_back(deltaChar);
-    data.push_back(token.endChar - token.startChar);
-    data.push_back(typeIndex);
-    data.push_back(token.tokenModifiers);
-
-    prevLine = token.line;
-    prevChar = token.startChar;
-  }
-
-  return data;
+  return presenter_.semanticTokensData(document->tokens, tokenTypes_);
 }
 
-void LSPServer::cacheDiagnostics(const std::string &uri,
+void LSPServer::cacheDiagnostics(DocumentState &document,
                                  const std::vector<Diagnostic> &diags) {
-  docDiagnostics_[uri].clear();
+  document.diagnosticsByLine.clear();
 
   for (const auto &diag : diags) {
     int line = diag.range.start.line;
-    docDiagnostics_[uri][line].push_back(diag);
+    document.diagnosticsByLine[line].push_back(diag);
   }
 }
 
-void LSPServer::removeDiagnosticsForLines(const std::string &uri,
+void LSPServer::removeDiagnosticsForLines(DocumentState &document,
                                           const std::set<int> &lines) {
-  if (docDiagnostics_.find(uri) == docDiagnostics_.end())
-    return;
-
-  auto &uriDiags = docDiagnostics_[uri];
   for (int line : lines) {
-    uriDiags.erase(line);
-  }
-}
-
-std::vector<Diagnostic>
-LSPServer::getAllDiagnostics(const std::string &uri) const {
-  std::vector<Diagnostic> allDiags;
-
-  auto uriIt = docDiagnostics_.find(uri);
-  if (uriIt != docDiagnostics_.end()) {
-    for (const auto &linePair : uriIt->second) {
-      for (const auto &diag : linePair.second) {
-        allDiags.push_back(diag);
-      }
-    }
+    document.diagnosticsByLine.erase(line);
   }
-
-  return allDiags;
 }
 
 std::set<int> LSPServer::findChangedLines(const std::string &oldText,
diff --git a/mozuku-lsp/src/mecab_manager.cpp b/mozuku-lsp/src/mecab_manager.cpp
index b042182..04fd796 100644
--- a/mozuku-lsp/src/mecab_manager.cpp
+++ b/mozuku-lsp/src/mecab_manager.cpp
@@ -1,6 +1,6 @@
 #include "mecab_manager.hpp"
+#include "mozuku/core/debug.hpp"
 #include <cabocha.h>
-#include <cstdlib>
 #include <fstream>
 #include <iostream>
 #include <mecab.h>
@@ -14,22 +14,12 @@
 namespace MoZuku {
 namespace mecab {
 
-static bool isDebugEnabled() {
-  static bool initialized = false;
-  static bool debug = false;
-  if (!initialized) {
-    debug = (std::getenv("MOZUKU_DEBUG") != nullptr);
-    initialized = true;
-  }
-  return debug;
-}
-
 MeCabManager::MeCabManager(bool enableCaboCha)
     : mecab_tagger_(nullptr), cabocha_parser_(nullptr),
       system_charset_("UTF-8"), cabocha_available_(false),
       enable_cabocha_(enableCaboCha) {
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] MeCabManager created with CaboCha "
               << (enableCaboCha ? "enabled" : "disabled") << std::endl;
   }
@@ -50,7 +40,7 @@ bool MeCabManager::initialize(const std::string &mecabDicPath,
                               const std::string &mecabCharset) {
   SystemLibInfo systemMeCab = detectSystemMeCab();
   if (!systemMeCab.isAvailable) {
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[ERROR] System MeCab not detected" << std::endl;
     }
     return false;
@@ -67,13 +57,13 @@ bool MeCabManager::initialize(const std::string &mecabDicPath,
     mecab_args = "-d " + mecabDicPath;
   } else if (!systemMeCab.dicPath.empty()) {
     mecab_args = "-d " + systemMeCab.dicPath + "/ipadic";
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] Using detected MeCab dicdir: "
                 << systemMeCab.dicPath << "/ipadic" << std::endl;
     }
   }
 
-  if (isDebugEnabled() && !mecab_args.empty()) {
+  if (debug::isEnabled() && !mecab_args.empty()) {
     std::cerr << "[DEBUG] MeCab args: " << mecab_args << std::endl;
   }
 
@@ -81,13 +71,13 @@ bool MeCabManager::initialize(const std::string &mecabDicPath,
   if (!mecab_tagger_) {
     std::string error = MeCab::getTaggerError() ? MeCab::getTaggerError()
                                                 : "Unknown MeCab error";
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[ERROR] MeCab initialization failed with args '"
                 << mecab_args << "': " << error << std::endl;
     }
 
     if (!mecab_args.empty()) {
-      if (isDebugEnabled()) {
+      if (debug::isEnabled()) {
         std::cerr << "[DEBUG] Trying MeCab without explicit dictionary path..."
                   << std::endl;
       }
@@ -95,7 +85,7 @@ bool MeCabManager::initialize(const std::string &mecabDicPath,
       if (!mecab_tagger_) {
         error = MeCab::getTaggerError() ? MeCab::getTaggerError()
                                         : "Unknown MeCab error";
-        if (isDebugEnabled()) {
+        if (debug::isEnabled()) {
           std::cerr << "[ERROR] MeCab fallback initialization also failed: "
                     << error << std::endl;
         }
@@ -108,7 +98,7 @@ bool MeCabManager::initialize(const std::string &mecabDicPath,
 
   system_charset_ = testMeCabCharset(mecab_tagger_, system_charset_);
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] MeCab successfully initialized with charset: "
               << system_charset_ << std::endl;
   }
@@ -119,22 +109,22 @@ bool MeCabManager::initialize(const std::string &mecabDicPath,
       cabocha_parser_ = cabocha_new2("");
       if (cabocha_parser_) {
         cabocha_available_ = true;
-        if (isDebugEnabled()) {
+        if (debug::isEnabled()) {
           std::cerr << "[DEBUG] CaboCha successfully initialized" << std::endl;
         }
       } else {
         const char *error = cabocha_strerror(nullptr);
-        if (isDebugEnabled()) {
+        if (debug::isEnabled()) {
           std::cerr << "[DEBUG] CaboCha initialization failed: "
                     << (error ? error : "Unknown error") << std::endl;
         }
       }
-    } else if (isDebugEnabled()) {
+    } else if (debug::isEnabled()) {
       std::cerr << "[DEBUG] CaboCha not available on system" << std::endl;
     }
   }
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] MeCabManager initialized - MeCab: "
               << (mecab_tagger_ ? "OK" : "FAIL")
               << ", CaboCha: " << (cabocha_available_ ? "OK" : "N/A")
@@ -147,7 +137,7 @@ bool MeCabManager::initialize(const std::string &mecabDicPath,
 SystemLibInfo MeCabManager::detectSystemMeCab() {
   SystemLibInfo info;
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Detecting system MeCab installation..." << std::endl;
   }
 
@@ -164,7 +154,7 @@ SystemLibInfo MeCabManager::detectSystemMeCab() {
       }
       info.dicPath = dicdir;
 
-      if (isDebugEnabled()) {
+      if (debug::isEnabled()) {
         std::cerr << "[DEBUG] mecab-config --dicdir: " << dicdir << std::endl;
       }
     }
@@ -187,7 +177,7 @@ SystemLibInfo MeCabManager::detectSystemMeCab() {
             charset.erase(charset.find_last_not_of(" \t") + 1);
             info.charset = charset;
 
-            if (isDebugEnabled()) {
+            if (debug::isEnabled()) {
               std::cerr << "[DEBUG] Found charset in dicrc: " << charset
                         << std::endl;
             }
@@ -200,12 +190,12 @@ SystemLibInfo MeCabManager::detectSystemMeCab() {
 
   if (info.charset.empty()) {
     info.charset = "UTF-8";
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] Using default charset: UTF-8" << std::endl;
     }
   } else if (info.charset != "UTF-8") {
     // Test if MeCab actually works with UTF-8 despite dicrc settings
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] dicrc says charset: " << info.charset
                 << ", testing actual behavior..." << std::endl;
     }
@@ -228,7 +218,7 @@ SystemLibInfo MeCabManager::detectSystemMeCab() {
         if (surface == testUtf8 &&
             surface.size() == 6) { // "誤解" is 6 bytes in UTF-8
           utf8Works = true;
-          if (isDebugEnabled()) {
+          if (debug::isEnabled()) {
             std::cerr << "[DEBUG] MeCab actually works with UTF-8 input, "
                          "overriding dicrc charset from "
                       << info.charset << " to UTF-8" << std::endl;
@@ -246,7 +236,7 @@ SystemLibInfo MeCabManager::detectSystemMeCab() {
 
   info.isAvailable = !info.dicPath.empty();
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] System MeCab detection result - Available: "
               << (info.isAvailable ? "yes" : "no")
               << ", DicPath: " << info.dicPath << ", Charset: " << info.charset
@@ -259,7 +249,7 @@ SystemLibInfo MeCabManager::detectSystemMeCab() {
 SystemLibInfo MeCabManager::detectSystemCaboCha() {
   SystemLibInfo info;
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] Detecting system CaboCha installation..."
               << std::endl;
   }
@@ -271,7 +261,7 @@ SystemLibInfo MeCabManager::detectSystemCaboCha() {
     char buffer[256];
     if (fgets(buffer, sizeof(buffer), pipe)) {
       info.isAvailable = true;
-      if (isDebugEnabled()) {
+      if (debug::isEnabled()) {
         std::cerr << "[DEBUG] cabocha-config found, system CaboCha available"
                   << std::endl;
       }
@@ -282,7 +272,7 @@ SystemLibInfo MeCabManager::detectSystemCaboCha() {
   SystemLibInfo mecabInfo = detectSystemMeCab();
   info.charset = mecabInfo.charset;
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] System CaboCha detection result - Available: "
               << (info.isAvailable ? "yes" : "no")
               << ", Charset: " << info.charset << std::endl;
@@ -310,7 +300,7 @@ std::string MeCabManager::testMeCabCharset(MeCab::Tagger *tagger,
 
     // If we get back the same UTF-8 text, MeCab is working in UTF-8 mode
     if (surface == testUtf8 && surface.size() == 6) {
-      if (isDebugEnabled()) {
+      if (debug::isEnabled()) {
         std::cerr << "[DEBUG] MeCab accepts UTF-8 input directly, using UTF-8"
                   << std::endl;
       }
@@ -318,7 +308,7 @@ std::string MeCabManager::testMeCabCharset(MeCab::Tagger *tagger,
     }
   }
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] MeCab requires " << originalCharset << " encoding"
               << std::endl;
   }
diff --git a/mozuku-lsp/src/pos_analyzer.cpp b/mozuku-lsp/src/pos_analyzer.cpp
index 76200b4..3e7e00d 100644
--- a/mozuku-lsp/src/pos_analyzer.cpp
+++ b/mozuku-lsp/src/pos_analyzer.cpp
@@ -9,9 +9,8 @@ std::string POSAnalyzer::mapPosToType(const char *feature) {
   if (!feature)
     return "unknown";
 
-  std::string f = text::TextProcessor::sanitizeUTF8(std::string(feature));
-  auto p = f.find(',');
-  std::string pos = (p == std::string::npos) ? f : f.substr(0, p);
+  std::vector<std::string> fields = parseFeatureFields(std::string(feature));
+  std::string pos = fields.empty() ? "" : fields.front();
 
   if (pos.find("名詞") != std::string::npos)
     return "noun";
@@ -39,6 +38,38 @@ std::string POSAnalyzer::mapPosToType(const char *feature) {
   return "unknown";
 }
 
+bool POSAnalyzer::isNounFeature(const std::string &feature) {
+  std::vector<std::string> fields = parseFeatureFields(feature);
+  return !fields.empty() && fields[0] == "名詞";
+}
+
+bool POSAnalyzer::isParticleFeature(const std::string &feature) {
+  std::vector<std::string> fields = parseFeatureFields(feature);
+  return !fields.empty() && fields[0] == "助詞";
+}
+
+bool POSAnalyzer::isConjunctionFeature(const std::string &feature) {
+  std::vector<std::string> fields = parseFeatureFields(feature);
+  return !fields.empty() && fields[0] == "接続詞";
+}
+
+bool POSAnalyzer::isAdversativeGaFeature(const std::string &feature) {
+  std::vector<std::string> fields = parseFeatureFields(feature);
+  return fields.size() > 6 && fields[0] == "助詞" && fields[1] == "接続助詞" &&
+         fields[6] == "が";
+}
+
+std::string POSAnalyzer::particleKey(const std::string &feature) {
+  std::vector<std::string> fields = parseFeatureFields(feature);
+  if (fields.empty()) {
+    return "";
+  }
+  if (fields.size() == 1) {
+    return fields[0];
+  }
+  return fields[0] + "," + fields[1];
+}
+
 void POSAnalyzer::parseFeatureDetails(const char *feature,
                                       std::string &baseForm,
                                       std::string &reading,
@@ -75,12 +106,12 @@ DetailedPOS POSAnalyzer::parseDetailedPOS(const char *feature,
   if (!feature)
     return pos;
 
-  std::string f =
+  std::string featureText =
       (systemCharset == "UTF-8")
-          ? std::string(feature)
+          ? text::TextProcessor::sanitizeUTF8(std::string(feature))
           : encoding::systemToUtf8(std::string(feature), systemCharset);
 
-  std::vector<std::string> fields = splitFeature(f);
+  std::vector<std::string> fields = splitFeature(featureText);
 
   // Fill in the detailed POS structure
   if (fields.size() > 0)
@@ -134,6 +165,11 @@ unsigned POSAnalyzer::computeModifiers(const std::string &text, size_t start,
   return mods;
 }
 
+std::vector<std::string>
+POSAnalyzer::parseFeatureFields(const std::string &feature) {
+  return splitFeature(text::TextProcessor::sanitizeUTF8(feature));
+}
+
 std::vector<std::string> POSAnalyzer::splitFeature(const std::string &feature) {
   std::vector<std::string> fields;
   size_t pos = 0;
diff --git a/mozuku-lsp/src/presenter.cpp b/mozuku-lsp/src/presenter.cpp
new file mode 100644
index 0000000..9f2a3e4
--- /dev/null
+++ b/mozuku-lsp/src/presenter.cpp
@@ -0,0 +1,123 @@
+#include "mozuku/lsp/presenter.hpp"
+
+#include "utf16.hpp"
+
+#include <algorithm>
+#include <iterator>
+
+namespace {
+
+nlohmann::json makeRangeJson(const Position &start, const Position &end) {
+  return {{"start", {{"line", start.line}, {"character", start.character}}},
+          {"end", {{"line", end.line}, {"character", end.character}}}};
+}
+
+nlohmann::json makeTokenRangeJson(const TokenData &token) {
+  return makeRangeJson(Position{token.line, token.startChar},
+                       Position{token.line, token.endChar});
+}
+
+} // namespace
+
+namespace MoZuku::lsp {
+
+Presenter::json Presenter::publishDiagnosticsParams(
+    const std::string &uri, const std::vector<Diagnostic> &diags) const {
+  json diagnostics = json::array();
+  for (const auto &diag : diags) {
+    diagnostics.push_back(
+        {{"range", makeRangeJson(diag.range.start, diag.range.end)},
+         {"severity", diag.severity},
+         {"message", diag.message}});
+  }
+
+  return {{"uri", uri}, {"diagnostics", diagnostics}};
+}
+
+Presenter::json Presenter::commentHighlightsParams(
+    const std::string &uri, const std::string &text,
+    const std::vector<comments::CommentSegment> &segments) const {
+  json ranges = json::array();
+  TextOffsetMapper offsetMapper(text);
+
+  for (const auto &segment : segments) {
+    Position start = offsetMapper.byteOffsetToPosition(segment.startByte);
+    Position end = offsetMapper.byteOffsetToPosition(segment.endByte);
+    ranges.push_back(makeRangeJson(start, end));
+  }
+
+  return {{"uri", uri}, {"ranges", ranges}};
+}
+
+Presenter::json
+Presenter::contentHighlightsParams(const std::string &uri,
+                                   const std::string &text,
+                                   const std::vector<ByteRange> &ranges) const {
+  json lspRanges = json::array();
+  TextOffsetMapper offsetMapper(text);
+
+  for (const auto &range : ranges) {
+    Position start = offsetMapper.byteOffsetToPosition(range.startByte);
+    Position end = offsetMapper.byteOffsetToPosition(range.endByte);
+    lspRanges.push_back(makeRangeJson(start, end));
+  }
+
+  return {{"uri", uri}, {"ranges", lspRanges}};
+}
+
+Presenter::json Presenter::semanticHighlightsParams(
+    const std::string &uri, bool isJapanese,
+    const std::vector<TokenData> &tokens) const {
+  if (isJapanese) {
+    return {{"uri", uri}, {"tokens", json::array()}};
+  }
+
+  json tokenEntries = json::array();
+  for (const auto &token : tokens) {
+    tokenEntries.push_back({{"range", makeTokenRangeJson(token)},
+                            {"type", token.tokenType},
+                            {"modifiers", token.tokenModifiers}});
+  }
+
+  return {{"uri", uri}, {"tokens", tokenEntries}};
+}
+
+Presenter::json Presenter::semanticTokensData(
+    const std::vector<TokenData> &tokens,
+    const std::vector<std::string> &tokenTypes) const {
+  json data = json::array();
+  int prevLine = 0;
+  int prevChar = 0;
+
+  for (const auto &token : tokens) {
+    int deltaLine = token.line - prevLine;
+    int deltaChar =
+        (deltaLine == 0) ? token.startChar - prevChar : token.startChar;
+
+    auto typeIt =
+        std::find(tokenTypes.begin(), tokenTypes.end(), token.tokenType);
+    int typeIndex =
+        (typeIt != tokenTypes.end())
+            ? static_cast<int>(std::distance(tokenTypes.begin(), typeIt))
+            : 0;
+
+    data.push_back(deltaLine);
+    data.push_back(deltaChar);
+    data.push_back(token.endChar - token.startChar);
+    data.push_back(typeIndex);
+    data.push_back(token.tokenModifiers);
+
+    prevLine = token.line;
+    prevChar = token.startChar;
+  }
+
+  return data;
+}
+
+Presenter::json Presenter::hoverResult(const TokenData &token,
+                                       const std::string &markdown) const {
+  return {{"contents", {{"kind", "markdown"}, {"value", markdown}}},
+          {"range", makeTokenRangeJson(token)}};
+}
+
+} // namespace MoZuku::lsp
diff --git a/mozuku-lsp/src/text_processor.cpp b/mozuku-lsp/src/text_processor.cpp
index 2fa03b9..06cbcbc 100644
--- a/mozuku-lsp/src/text_processor.cpp
+++ b/mozuku-lsp/src/text_processor.cpp
@@ -1,85 +1,28 @@
 #include "text_processor.hpp"
+#include "encoding_utils.hpp"
+#include "mozuku/core/debug.hpp"
 #include <algorithm>
-#include <cstdlib>
 #include <iostream>
+#include <vector>
+#include <cstdint>
 
 namespace MoZuku {
 namespace text {
 
-static bool isDebugEnabled() {
-  static bool initialized = false;
-  static bool debug = false;
-  if (!initialized) {
-    debug = (std::getenv("MOZUKU_DEBUG") != nullptr);
-    initialized = true;
-  }
-  return debug;
-}
-
 std::string TextProcessor::sanitizeUTF8(const std::string &input) {
-  if (input.empty())
-    return input;
-
-  std::string result;
-  result.reserve(input.size());
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    unsigned char c = static_cast<unsigned char>(input[i]);
-
-    // ASCII characters (0x00-0x7F) are safe
-    if (c < 0x80) {
-      // Skip control characters except tab, newline, carriage return
-      if (c >= 0x20 || c == 0x09 || c == 0x0A || c == 0x0D) {
-        result += static_cast<char>(c);
-      }
-      continue;
-    }
-
-    // Handle multi-byte UTF-8 sequences
-    size_t seqLen = 0;
-    if ((c & 0xE0) == 0xC0)
-      seqLen = 2; // 110xxxxx (2-byte)
-    else if ((c & 0xF0) == 0xE0)
-      seqLen = 3; // 1110xxxx (3-byte)
-    else if ((c & 0xF8) == 0xF0)
-      seqLen = 4; // 11110xxx (4-byte)
-    else {
-      // Invalid UTF-8 start byte, skip it
-      continue;
-    }
-
-    // Check if we have enough bytes for the sequence
-    if (i + seqLen > input.size()) {
-      break; // Incomplete sequence at end of string
-    }
-
-    // Validate all continuation bytes
-    if (isValidUtf8Sequence(input, i, seqLen)) {
-      // Valid sequence, copy it
-      for (size_t j = 0; j < seqLen; ++j) {
-        result += input[i + j];
-      }
-      i += seqLen - 1; // -1 because loop will increment i
-    } else {
-      // Invalid sequence, skip start byte (continuation bytes will be handled
-      // in next iterations)
-      continue;
-    }
-  }
-
-  return result;
+  return encoding::sanitizeUtf8(input);
 }
 
 std::vector<SentenceBoundary>
 TextProcessor::splitIntoSentences(const std::string &text) {
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] splitIntoSentences called with text length: "
               << text.size() << std::endl;
   }
 
   std::vector<SentenceBoundary> sentences;
   if (text.empty()) {
-    if (isDebugEnabled()) {
+    if (debug::isEnabled()) {
       std::cerr << "[DEBUG] Empty text, returning empty sentences" << std::endl;
     }
     return sentences;
@@ -158,7 +101,7 @@ TextProcessor::splitIntoSentences(const std::string &text) {
         sentence.text = sentence.text.substr(textStart, textEnd - textStart);
         sentences.push_back(sentence);
 
-        if (isDebugEnabled()) {
+        if (debug::isEnabled()) {
           std::cerr << "[DEBUG] Created sentence " << sentenceId - 1
                     << ": length=" << sentence.text.size()
                     << ", start=" << sentence.start << ", end=" << sentence.end
@@ -178,7 +121,7 @@ TextProcessor::splitIntoSentences(const std::string &text) {
     }
   }
 
-  if (isDebugEnabled()) {
+  if (debug::isEnabled()) {
     std::cerr << "[DEBUG] splitIntoSentences completed: created "
               << sentences.size() << " sentences" << std::endl;
   }
diff --git a/mozuku-lsp/src/tree_sitter_document.cpp b/mozuku-lsp/src/tree_sitter_document.cpp
new file mode 100644
index 0000000..00187b3
--- /dev/null
+++ b/mozuku-lsp/src/tree_sitter_document.cpp
@@ -0,0 +1,107 @@
+#include "mozuku/treesitter/document.hpp"
+
+#include <algorithm>
+#include <cctype>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+extern "C" {
+const TSLanguage *tree_sitter_c();
+const TSLanguage *tree_sitter_cpp();
+const TSLanguage *tree_sitter_html();
+const TSLanguage *tree_sitter_javascript();
+const TSLanguage *tree_sitter_python();
+const TSLanguage *tree_sitter_rust();
+const TSLanguage *tree_sitter_typescript();
+const TSLanguage *tree_sitter_tsx();
+const TSLanguage *tree_sitter_latex();
+}
+
+namespace {
+
+using LanguageFactory = const TSLanguage *(*)();
+
+const std::unordered_map<std::string, LanguageFactory> &languageMap() {
+  static const std::unordered_map<std::string, LanguageFactory> map = {
+      {"c", tree_sitter_c},
+      {"cpp", tree_sitter_cpp},
+      {"c++", tree_sitter_cpp},
+      {"html", tree_sitter_html},
+      {"javascript", tree_sitter_javascript},
+      {"javascriptreact", tree_sitter_tsx},
+      {"typescript", tree_sitter_typescript},
+      {"typescriptreact", tree_sitter_tsx},
+      {"tsx", tree_sitter_tsx},
+      {"python", tree_sitter_python},
+      {"rust", tree_sitter_rust},
+      {"latex", tree_sitter_latex}};
+  return map;
+}
+
+std::string toLower(std::string input) {
+  std::transform(
+      input.begin(), input.end(), input.begin(),
+      [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
+  return input;
+}
+
+struct ParserDeleter {
+  void operator()(TSParser *parser) const {
+    if (parser) {
+      ts_parser_delete(parser);
+    }
+  }
+};
+
+} // namespace
+
+namespace MoZuku::treesitter {
+
+const TSLanguage *resolveLanguage(const std::string &languageId) {
+  const auto &map = languageMap();
+  auto it = map.find(toLower(languageId));
+  if (it == map.end()) {
+    return nullptr;
+  }
+  return it->second();
+}
+
+bool isLanguageSupported(const std::string &languageId) {
+  const auto &map = languageMap();
+  return map.find(toLower(languageId)) != map.end();
+}
+
+ParsedDocument::ParsedDocument() : tree_(nullptr, &ts_tree_delete) {}
+
+ParsedDocument::ParsedDocument(const std::string &languageId,
+                               const std::string &text)
+    : ParsedDocument(resolveLanguage(languageId), text) {}
+
+ParsedDocument::ParsedDocument(const TSLanguage *language,
+                               const std::string &text)
+    : tree_(nullptr, &ts_tree_delete) {
+  if (!language) {
+    return;
+  }
+
+  std::unique_ptr<TSParser, ParserDeleter> parser(ts_parser_new());
+  if (!parser) {
+    return;
+  }
+
+  if (!ts_parser_set_language(parser.get(), language)) {
+    return;
+  }
+
+  tree_.reset(
+      ts_parser_parse_string(parser.get(), nullptr, text.c_str(), text.size()));
+}
+
+bool ParsedDocument::isValid() const { return tree_ != nullptr; }
+
+TSNode ParsedDocument::root() const {
+  return tree_ ? ts_tree_root_node(tree_.get()) : TSNode{};
+}
+
+} // namespace MoZuku::treesitter
diff --git a/mozuku-lsp/src/utf16.cpp b/mozuku-lsp/src/utf16.cpp
index def910b..a85f84f 100644
--- a/mozuku-lsp/src/utf16.cpp
+++ b/mozuku-lsp/src/utf16.cpp
@@ -1,139 +1,162 @@
 #include "utf16.hpp"
 
+#include "encoding_utils.hpp"
+
 namespace {
-static inline int utf8SeqLen(unsigned char c) {
-  if (c < 0x80)
+
+size_t validatedSequenceLength(const std::string &text, size_t offset) {
+  if (offset >= text.size()) {
+    return 0;
+  }
+
+  size_t seqLen = MoZuku::encoding::utf8SequenceLength(
+      static_cast<unsigned char>(text[offset]));
+  if (seqLen == 0 || offset + seqLen > text.size()) {
     return 1;
-  if (c < 0xE0)
-    return 2;
-  if (c < 0xF0)
-    return 3;
-  return 4;
+  }
+
+  for (size_t i = 1; i < seqLen; ++i) {
+    unsigned char c = static_cast<unsigned char>(text[offset + i]);
+    if ((c & 0xC0) != 0x80) {
+      return 1;
+    }
+  }
+
+  return seqLen;
 }
 
-static inline unsigned int decodeCodePoint(const std::string &s, size_t &i) {
-  unsigned char c = static_cast<unsigned char>(s[i]);
-  if (c < 0x80) {
-    return s[i++];
+unsigned int decodeCodePoint(const std::string &text, size_t offset,
+                             size_t seqLen) {
+  unsigned char c = static_cast<unsigned char>(text[offset]);
+  if (seqLen == 1) {
+    return c;
   }
-  if ((c >> 5) == 0x6) {
-    unsigned int cp =
-        ((c & 0x1F) << 6) | (static_cast<unsigned char>(s[i + 1]) & 0x3F);
-    i += 2;
-    return cp;
+  if (seqLen == 2) {
+    return ((c & 0x1F) << 6) |
+           (static_cast<unsigned char>(text[offset + 1]) & 0x3F);
   }
-  if ((c >> 4) == 0xE) {
-    unsigned int cp = ((c & 0x0F) << 12) |
-                      ((static_cast<unsigned char>(s[i + 1]) & 0x3F) << 6) |
-                      (static_cast<unsigned char>(s[i + 2]) & 0x3F);
-    i += 3;
-    return cp;
+  if (seqLen == 3) {
+    return ((c & 0x0F) << 12) |
+           ((static_cast<unsigned char>(text[offset + 1]) & 0x3F) << 6) |
+           (static_cast<unsigned char>(text[offset + 2]) & 0x3F);
+  }
+  return ((c & 0x07) << 18) |
+         ((static_cast<unsigned char>(text[offset + 1]) & 0x3F) << 12) |
+         ((static_cast<unsigned char>(text[offset + 2]) & 0x3F) << 6) |
+         (static_cast<unsigned char>(text[offset + 3]) & 0x3F);
+}
+
+int utf16UnitsAt(const std::string &text, size_t offset, size_t seqLen) {
+  if (seqLen < 4) {
+    return 1;
   }
-  unsigned int cp = ((c & 0x07) << 18) |
-                    ((static_cast<unsigned char>(s[i + 1]) & 0x3F) << 12) |
-                    ((static_cast<unsigned char>(s[i + 2]) & 0x3F) << 6) |
-                    (static_cast<unsigned char>(s[i + 3]) & 0x3F);
-  i += 4;
-  return cp;
+
+  unsigned int cp = decodeCodePoint(text, offset, seqLen);
+  return cp <= 0xFFFF ? 1 : 2;
 }
+
 } // namespace
 
+TextOffsetMapper::TextOffsetMapper(const std::string &text)
+    : text_(text), line_starts_(computeLineStarts(text)) {}
+
+const std::vector<size_t> &TextOffsetMapper::lineStarts() const {
+  return line_starts_;
+}
+
+Position TextOffsetMapper::byteOffsetToPosition(size_t offset) const {
+  return ::byteOffsetToPosition(text_, line_starts_, offset);
+}
+
+size_t TextOffsetMapper::positionToByteOffset(int line, int character) const {
+  return ::positionToByteOffset(text_, line_starts_, line, character);
+}
+
+size_t TextOffsetMapper::positionToByteOffset(const Position &position) const {
+  return positionToByteOffset(position.line, position.character);
+}
+
+size_t TextOffsetMapper::tokenStartByteOffset(const TokenData &token) const {
+  return positionToByteOffset(token.line, token.startChar);
+}
+
 std::vector<size_t> computeLineStarts(const std::string &text) {
   std::vector<size_t> lineStarts;
   lineStarts.reserve(64);
   lineStarts.push_back(0);
-  for (size_t i = 0; i < text.size(); ++i)
-    if (text[i] == '\n')
+  for (size_t i = 0; i < text.size(); ++i) {
+    if (text[i] == '\n') {
       lineStarts.push_back(i + 1);
+    }
+  }
   return lineStarts;
 }
 
 Position byteOffsetToPosition(const std::string &text,
                               const std::vector<size_t> &lineStarts,
                               size_t offset) {
-  // オフセットをテキストサイズに制限
-  if (offset > text.size())
+  if (offset > text.size()) {
     offset = text.size();
+  }
 
-  // オフセット以下の最後の開始位置を二分探索で検索
-  size_t lo = 0, hi = lineStarts.size();
+  size_t lo = 0;
+  size_t hi = lineStarts.size();
   while (lo + 1 < hi) {
     size_t mid = (lo + hi) / 2;
-    if (lineStarts[mid] <= offset)
+    if (lineStarts[mid] <= offset) {
       lo = mid;
-    else
+    } else {
       hi = mid;
+    }
+  }
+
+  size_t bytePos = lineStarts[lo];
+  int utf16Pos = 0;
+
+  while (bytePos < offset && bytePos < text.size() && text[bytePos] != '\n') {
+    size_t seqLen = validatedSequenceLength(text, bytePos);
+    utf16Pos += utf16UnitsAt(text, bytePos, seqLen);
+    bytePos += seqLen;
   }
 
-  size_t lineStart = lineStarts[lo];
+  return Position{static_cast<int>(lo), utf16Pos};
+}
 
-  // 行開始からオフセットまでのUTF-16コードユニット数をカウント
-  size_t i = lineStart;
-  unsigned int col16 = 0;
+size_t positionToByteOffset(const std::string &text,
+                            const std::vector<size_t> &lineStarts, int line,
+                            int character) {
+  if (line < 0 || lineStarts.empty()) {
+    return 0;
+  }
+  if (line >= static_cast<int>(lineStarts.size())) {
+    return text.size();
+  }
 
-  while (i < offset && i < text.size() && text[i] != '\n') {
-    unsigned char c = static_cast<unsigned char>(text[i]);
+  size_t bytePos = lineStarts[line];
+  int utf16Pos = 0;
 
-    // 効率性と正確性のためASCII文字を直接処理
-    if (c < 0x80) {
-      // ASCII文字 (タブ、スペースを含む) は常に1つのUTF-16コードユニット
-      col16 += 1;
-      i += 1;
-    } else {
-      // マルチバイトUTF-8文字
-      size_t prev = i;
-      unsigned int cp = decodeCodePoint(text, i);
-
-      // UTF-16エンコーディング:
-      // BMP文字は1コードユニット、その他は2コードユニット (サロゲートペア)
-      if (cp <= 0xFFFF) {
-        col16 += 1; // BMP文字: 1 UTF-16コードユニット
-      } else {
-        col16 += 2; // 非BMP文字: 2 UTF-16コードユニット (サロゲートペア)
-      }
-
-      // 無限ループを防ぐ安全性チェック
-      if (i == prev) {
-        i++;     // 無効なバイトをスキップ
-        col16++; // 1コードユニットとしてカウント
-      }
-    }
+  while (bytePos < text.size() && utf16Pos < character &&
+         text[bytePos] != '\n') {
+    size_t seqLen = validatedSequenceLength(text, bytePos);
+    utf16Pos += utf16UnitsAt(text, bytePos, seqLen);
+    bytePos += seqLen;
   }
 
-  return Position{static_cast<int>(lo), static_cast<int>(col16)};
+  return bytePos;
+}
+
+size_t positionToByteOffset(const std::string &text, int line, int character) {
+  return positionToByteOffset(text, computeLineStarts(text), line, character);
 }
 
 size_t utf8ToUtf16Length(const std::string &utf8Str) {
-  size_t i = 0;
+  size_t offset = 0;
   size_t utf16Length = 0;
 
-  while (i < utf8Str.size()) {
-    unsigned char c = static_cast<unsigned char>(utf8Str[i]);
-
-    // 効率性と正確性のためASCII文字を直接処理
-    if (c < 0x80) {
-      // ASCII文字 (タブ、スペースを含む) は常に1つのUTF-16コードユニット
-      utf16Length += 1;
-      i += 1;
-    } else {
-      // マルチバイトUTF-8文字
-      size_t prev = i;
-      unsigned int cp = decodeCodePoint(utf8Str, i);
-
-      // UTF-16エンコーディング:
-      // BMP文字は1コードユニット、その他は2コードユニット (サロゲートペア)
-      if (cp <= 0xFFFF) {
-        utf16Length += 1; // BMP character
-      } else {
-        utf16Length += 2; // Non-BMP character (surrogate pair)
-      }
-
-      // 無限ループを防ぐ安全性チェック
-      if (i == prev) {
-        i++;           // 無効なバイトをスキップ
-        utf16Length++; // 1コードユニットとしてカウント
-      }
-    }
+  while (offset < utf8Str.size()) {
+    size_t seqLen = validatedSequenceLength(utf8Str, offset);
+    utf16Length += utf16UnitsAt(utf8Str, offset, seqLen);
+    offset += seqLen;
   }
 
   return utf16Length;
diff --git a/vscode-mozuku/src/client.ts b/vscode-mozuku/src/client.ts
index a0babb3..9159a5b 100644
--- a/vscode-mozuku/src/client.ts
+++ b/vscode-mozuku/src/client.ts
@@ -1,6 +1,5 @@
 import * as vscode from "vscode";
 import * as fs from "fs";
-import * as path from "path";
 import {
   LanguageClient,
   LanguageClientOptions,
@@ -8,6 +7,8 @@ import {
   TransportKind,
   State,
 } from "vscode-languageclient/node";
+import { buildDocumentSelector, buildInitializationOptions } from "./config";
+import { resolveServerPath as discoverServerPath } from "./server-discovery";
 
 type CommentHighlightMessage = {
   uri: string;
@@ -37,21 +38,6 @@ type SemanticHighlightMessage = {
   }>;
 };
 
-const supportedLanguages = [
-  "japanese",
-  "c",
-  "cpp",
-  "html",
-  "python",
-  "javascript",
-  "javascriptreact",
-  "typescript",
-  "typescriptreact",
-  "rust",
-  "html",
-  "latex",
-];
-
 export async function startClient(
   ctx: vscode.ExtensionContext,
   serverPath: string,
@@ -60,7 +46,7 @@ export async function startClient(
     process.env.VSCODE_DEBUG_MODE === "true" ||
     ctx.extensionMode === vscode.ExtensionMode.Development;
 
-  const resolved = resolveServerPath(ctx, serverPath);
+  const resolved = discoverServerPath(ctx, serverPath, isDebug);
   console.log("[MoZuku] 最終的に解決されたサーバーパス:", resolved);
 
   if (!fs.existsSync(resolved)) {
@@ -85,87 +71,7 @@ export async function startClient(
     },
   };
 
-  const config = vscode.workspace.getConfiguration("mozuku");
-  const initOptions = {
-    mozuku: {
-      mecab: {
-        dicdir: config.get<string>("mecab.dicdir", ""),
-        charset: config.get<string>("mecab.charset", "UTF-8"),
-      },
-      analysis: {
-        enableCaboCha: config.get<boolean>("analysis.enableCaboCha", true),
-        grammarCheck: config.get<boolean>("analysis.grammarCheck", true),
-        minJapaneseRatio: config.get<number>("analysis.minJapaneseRatio", 0.1),
-        warningMinSeverity: config.get<number>(
-          "analysis.warningMinSeverity",
-          2,
-        ),
-        warnings: {
-          particleDuplicate: config.get<boolean>(
-            "analysis.warnings.particleDuplicate",
-            true,
-          ),
-          particleSequence: config.get<boolean>(
-            "analysis.warnings.particleSequence",
-            true,
-          ),
-          particleMismatch: config.get<boolean>(
-            "analysis.warnings.particleMismatch",
-            true,
-          ),
-          sentenceStructure: config.get<boolean>(
-            "analysis.warnings.sentenceStructure",
-            false,
-          ),
-          styleConsistency: config.get<boolean>(
-            "analysis.warnings.styleConsistency",
-            false,
-          ),
-          redundancy: config.get<boolean>(
-            "analysis.warnings.redundancy",
-            false,
-          ),
-        },
-        rules: {
-          commaLimit: config.get<boolean>("analysis.rules.commaLimit", true),
-          adversativeGa: config.get<boolean>(
-            "analysis.rules.adversativeGa",
-            true,
-          ),
-          duplicateParticleSurface: config.get<boolean>(
-            "analysis.rules.duplicateParticleSurface",
-            true,
-          ),
-          adjacentParticles: config.get<boolean>(
-            "analysis.rules.adjacentParticles",
-            true,
-          ),
-          conjunctionRepeat: config.get<boolean>(
-            "analysis.rules.conjunctionRepeat",
-            true,
-          ),
-          raDropping: config.get<boolean>("analysis.rules.raDropping", true),
-          commaLimitMax: config.get<number>("analysis.rules.commaLimitMax", 3),
-          adversativeGaMax: config.get<number>(
-            "analysis.rules.adversativeGaMax",
-            1,
-          ),
-          duplicateParticleSurfaceMaxRepeat: config.get<number>(
-            "analysis.rules.duplicateParticleSurfaceMaxRepeat",
-            1,
-          ),
-          adjacentParticlesMaxRepeat: config.get<number>(
-            "analysis.rules.adjacentParticlesMaxRepeat",
-            1,
-          ),
-          conjunctionRepeatMax: config.get<number>(
-            "analysis.rules.conjunctionRepeatMax",
-            1,
-          ),
-        },
-      },
-    },
-  };
+  const initOptions = buildInitializationOptions();
 
   if (isDebug) {
     console.log(
@@ -174,14 +80,8 @@ export async function startClient(
     );
   }
 
-  const documentSelector = [
-    ...supportedLanguages.map((language) => ({ language })),
-    { scheme: "file", pattern: "**/*.ja.txt" },
-    { scheme: "file", pattern: "**/*.ja.md" },
-  ];
-
   const clientOptions: LanguageClientOptions = {
-    documentSelector,
+    documentSelector: buildDocumentSelector(),
     synchronize: {
       fileEvents: vscode.workspace.createFileSystemWatcher("**/*"),
     },
@@ -439,212 +339,12 @@ export async function startClient(
   return client;
 }
 
-function resolveServerPath(
+export function resolveServerPath(
   ctx: vscode.ExtensionContext,
   configured: string,
 ): string {
-  const isWindows = process.platform === "win32";
-  const exeName = isWindows ? "mozuku-lsp.exe" : "mozuku-lsp";
   const isDebug =
     process.env.VSCODE_DEBUG_MODE === "true" ||
     ctx.extensionMode === vscode.ExtensionMode.Development;
-  const configuredValue = configured.trim();
-  const envValue = process.env.MOZUKU_LSP?.trim() ?? "";
-  const workspaceRoot = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath;
-  const extensionRoot = ctx.extensionUri.fsPath;
-  const seen = new Set<string>();
-
-  if (isDebug) {
-    console.log("[MoZuku] サーバーパスを解決中:", {
-      configured: configuredValue,
-      extensionPath: extensionRoot,
-      workspaceFolders: vscode.workspace.workspaceFolders?.map(
-        (f) => f.uri.fsPath,
-      ),
-    });
-  }
-
-  const candidates: { type: string; path: string }[] = [];
-  const add = (type: string, p: string | undefined) => {
-    if (!p || p.trim().length === 0) {
-      return;
-    }
-    const normalized = path.normalize(p);
-    if (seen.has(normalized)) {
-      return;
-    }
-    seen.add(normalized);
-    candidates.push({ type, path: p });
-  };
-  const addResolvedPath = (type: string, candidate: string | undefined) => {
-    if (!candidate) {
-      return;
-    }
-    if (path.isAbsolute(candidate)) {
-      add(type, candidate);
-      return;
-    }
-    if (workspaceRoot) {
-      add(`${type} (workspace)`, path.join(workspaceRoot, candidate));
-    }
-    add(`${type} (extension)`, path.join(extensionRoot, candidate));
-    add(`${type} (cwd)`, path.resolve(candidate));
-  };
-  const installDirs = (): string[] => {
-    const dirs: string[] = [];
-    const pathEnv = process.env.PATH || "";
-    for (const dir of pathEnv.split(path.delimiter)) {
-      if (dir) {
-        dirs.push(dir);
-      }
-    }
-
-    const home = process.env.HOME || process.env.USERPROFILE;
-    if (home) {
-      dirs.push(path.join(home, ".local", "bin"));
-      dirs.push(path.join(home, "bin"));
-    }
-
-    if (isWindows) {
-      const localAppData = process.env.LOCALAPPDATA;
-      if (localAppData) {
-        dirs.push(path.join(localAppData, "Programs", "MoZuku", "bin"));
-        dirs.push(path.join(localAppData, "Programs", "mozuku-lsp", "bin"));
-      }
-      for (const base of [
-        process.env.ProgramFiles,
-        process.env["ProgramFiles(x86)"],
-      ]) {
-        if (!base) {
-          continue;
-        }
-        dirs.push(path.join(base, "MoZuku", "bin"));
-        dirs.push(path.join(base, "mozuku-lsp", "bin"));
-      }
-    } else {
-      dirs.push("/usr/local/bin");
-      dirs.push("/usr/bin");
-      if (process.platform === "darwin") {
-        dirs.push("/opt/homebrew/bin");
-        dirs.push("/opt/local/bin");
-      }
-    }
-
-    return dirs;
-  };
-  const addCommandSearch = (type: string, commandName: string | undefined) => {
-    if (!commandName || hasPathSep(commandName)) {
-      return;
-    }
-    const names =
-      isWindows && !commandName.toLowerCase().endsWith(".exe")
-        ? [commandName, `${commandName}.exe`]
-        : [commandName];
-    for (const dir of installDirs()) {
-      for (const name of names) {
-        add(type, path.join(dir, name));
-      }
-    }
-  };
-
-  if (configuredValue && hasPathSep(configuredValue)) {
-    addResolvedPath("設定済み", configuredValue);
-  }
-  if (envValue && hasPathSep(envValue)) {
-    addResolvedPath("環境変数 MOZUKU_LSP", envValue);
-  }
-
-  addCommandSearch(
-    "設定済みコマンド",
-    configuredValue && !hasPathSep(configuredValue)
-      ? configuredValue
-      : undefined,
-  );
-  addCommandSearch(
-    "環境変数 MOZUKU_LSP",
-    envValue && !hasPathSep(envValue) ? envValue : undefined,
-  );
-  addCommandSearch("デフォルトコマンド", exeName);
-
-  add(
-    "パッケージ済み",
-    vscode.Uri.joinPath(ctx.extensionUri, "bin", exeName).fsPath,
-  );
-
-  const plat = process.platform;
-  const arch = process.arch;
-  add(
-    "パッケージ済み",
-    vscode.Uri.joinPath(
-      ctx.extensionUri,
-      "server",
-      "bin",
-      `${plat}-${arch}`,
-      exeName,
-    ).fsPath,
-  );
-
-  if (workspaceRoot) {
-    add(
-      "ワークスペース-install",
-      path.join(workspaceRoot, "build", "install", "bin", exeName),
-    );
-    add("ワークスペース-build", path.join(workspaceRoot, "build", exeName));
-    add(
-      "ワークスペース-install",
-      path.join(
-        workspaceRoot,
-        "mozuku-lsp",
-        "build",
-        "install",
-        "bin",
-        exeName,
-      ),
-    );
-    add(
-      "ワークスペース-build",
-      path.join(workspaceRoot, "mozuku-lsp", "build", exeName),
-    );
-  }
-
-  add(
-    "開発-install",
-    path.join(
-      extensionRoot,
-      "..",
-      "mozuku-lsp",
-      "build",
-      "install",
-      "bin",
-      exeName,
-    ),
-  );
-  add(
-    "開発-build",
-    path.join(extensionRoot, "..", "mozuku-lsp", "build", exeName),
-  );
-
-  for (const candidate of candidates) {
-    if (fs.existsSync(candidate.path)) {
-      if (isDebug) {
-        console.log(`[MoZuku] ${candidate.type}パスを使用:`, candidate.path);
-      }
-      return candidate.path;
-    } else if (isDebug) {
-      console.log(
-        `[MoZuku] ${candidate.type}パスが見つかりません:`,
-        candidate.path,
-      );
-    }
-  }
-
-  const fallback = configuredValue || envValue || exeName;
-  if (isDebug) {
-    console.log("[MoZuku] フォールバックパスを使用:", fallback);
-  }
-  return fallback;
-}
-
-function hasPathSep(p: string): boolean {
-  return p.includes("/") || p.includes("\\");
+  return discoverServerPath(ctx, configured, isDebug);
 }
diff --git a/vscode-mozuku/src/config.ts b/vscode-mozuku/src/config.ts
new file mode 100644
index 0000000..a821a9f
--- /dev/null
+++ b/vscode-mozuku/src/config.ts
@@ -0,0 +1,112 @@
+import * as vscode from "vscode";
+import type { LanguageClientOptions } from "vscode-languageclient/node";
+
+const supportedLanguages = [
+  "japanese",
+  "c",
+  "cpp",
+  "html",
+  "python",
+  "javascript",
+  "javascriptreact",
+  "typescript",
+  "typescriptreact",
+  "rust",
+  "html",
+  "latex",
+];
+
+export function buildInitializationOptions() {
+  const config = vscode.workspace.getConfiguration("mozuku");
+
+  return {
+    mozuku: {
+      mecab: {
+        dicdir: config.get<string>("mecab.dicdir", ""),
+        charset: config.get<string>("mecab.charset", "UTF-8"),
+      },
+      analysis: {
+        enableCaboCha: config.get<boolean>("analysis.enableCaboCha", true),
+        grammarCheck: config.get<boolean>("analysis.grammarCheck", true),
+        minJapaneseRatio: config.get<number>("analysis.minJapaneseRatio", 0.1),
+        warningMinSeverity: config.get<number>(
+          "analysis.warningMinSeverity",
+          2,
+        ),
+        warnings: {
+          particleDuplicate: config.get<boolean>(
+            "analysis.warnings.particleDuplicate",
+            true,
+          ),
+          particleSequence: config.get<boolean>(
+            "analysis.warnings.particleSequence",
+            true,
+          ),
+          particleMismatch: config.get<boolean>(
+            "analysis.warnings.particleMismatch",
+            true,
+          ),
+          sentenceStructure: config.get<boolean>(
+            "analysis.warnings.sentenceStructure",
+            false,
+          ),
+          styleConsistency: config.get<boolean>(
+            "analysis.warnings.styleConsistency",
+            false,
+          ),
+          redundancy: config.get<boolean>(
+            "analysis.warnings.redundancy",
+            false,
+          ),
+        },
+        rules: {
+          commaLimit: config.get<boolean>("analysis.rules.commaLimit", true),
+          adversativeGa: config.get<boolean>(
+            "analysis.rules.adversativeGa",
+            true,
+          ),
+          duplicateParticleSurface: config.get<boolean>(
+            "analysis.rules.duplicateParticleSurface",
+            true,
+          ),
+          adjacentParticles: config.get<boolean>(
+            "analysis.rules.adjacentParticles",
+            true,
+          ),
+          conjunctionRepeat: config.get<boolean>(
+            "analysis.rules.conjunctionRepeat",
+            true,
+          ),
+          raDropping: config.get<boolean>("analysis.rules.raDropping", true),
+          commaLimitMax: config.get<number>("analysis.rules.commaLimitMax", 3),
+          adversativeGaMax: config.get<number>(
+            "analysis.rules.adversativeGaMax",
+            1,
+          ),
+          duplicateParticleSurfaceMaxRepeat: config.get<number>(
+            "analysis.rules.duplicateParticleSurfaceMaxRepeat",
+            1,
+          ),
+          adjacentParticlesMaxRepeat: config.get<number>(
+            "analysis.rules.adjacentParticlesMaxRepeat",
+            1,
+          ),
+          conjunctionRepeatMax: config.get<number>(
+            "analysis.rules.conjunctionRepeatMax",
+            1,
+          ),
+        },
+      },
+    },
+  };
+}
+
+export function buildDocumentSelector(): NonNullable<
+  LanguageClientOptions["documentSelector"]
+> {
+  return [
+    ...supportedLanguages.map((language) => ({ language })),
+    { scheme: "file", pattern: "**/*.ja.txt" },
+    { scheme: "file", pattern: "**/*.ja.md" },
+  ];
+}
diff --git a/vscode-mozuku/src/extension.ts b/vscode-mozuku/src/extension.ts
index a7ed152..8b23f6b 100644
--- a/vscode-mozuku/src/extension.ts
+++ b/vscode-mozuku/src/extension.ts
@@ -1,16 +1,16 @@
-import * as vscode from 'vscode';
-import { startClient } from './client';
+import * as vscode from "vscode";
+import { startClient } from "./client";
 
 export async function activate(context: vscode.ExtensionContext) {
-  console.log('[MoZuku] Extension activation started...');
+  console.log("[MoZuku] Extension activation started...");
 
   const serverPath =
-    vscode.workspace.getConfiguration('mozuku').get<string>('serverPath') ||
-    'mozuku-lsp';
-  console.log('[MoZuku] LSP client starting: server path =', serverPath);
+    vscode.workspace.getConfiguration("mozuku").get<string>("serverPath") ||
+    "mozuku-lsp";
+  console.log("[MoZuku] LSP client starting: server path =", serverPath);
   const client = await startClient(context, serverPath);
 
-  console.log('[MoZuku] Extension activation completed');
+  console.log("[MoZuku] Extension activation completed");
 }
 
-export function deactivate() { }
+export function deactivate() {}
diff --git a/vscode-mozuku/src/server-discovery.ts b/vscode-mozuku/src/server-discovery.ts
new file mode 100644
index 0000000..511a5f2
--- /dev/null
+++ b/vscode-mozuku/src/server-discovery.ts
@@ -0,0 +1,213 @@
+import * as fs from "fs";
+import * as path from "path";
+import * as vscode from "vscode";
+
+export function resolveServerPath(
+  ctx: vscode.ExtensionContext,
+  configured: string,
+  isDebug: boolean,
+): string {
+  const isWindows = process.platform === "win32";
+  const exeName = isWindows ? "mozuku-lsp.exe" : "mozuku-lsp";
+  const configuredValue = configured.trim();
+  const envValue = process.env.MOZUKU_LSP?.trim() ?? "";
+  const workspaceRoot = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath;
+  const extensionRoot = ctx.extensionUri.fsPath;
+  const seen = new Set<string>();
+
+  if (isDebug) {
+    console.log("[MoZuku] サーバーパスを解決中:", {
+      configured: configuredValue,
+      extensionPath: extensionRoot,
+      workspaceFolders: vscode.workspace.workspaceFolders?.map(
+        (f) => f.uri.fsPath,
+      ),
+    });
+  }
+
+  const candidates: { type: string; path: string }[] = [];
+  const add = (type: string, candidatePath: string | undefined) => {
+    if (!candidatePath || candidatePath.trim().length === 0) {
+      return;
+    }
+    const normalized = path.normalize(candidatePath);
+    if (seen.has(normalized)) {
+      return;
+    }
+    seen.add(normalized);
+    candidates.push({ type, path: candidatePath });
+  };
+
+  const addResolvedPath = (type: string, candidate: string | undefined) => {
+    if (!candidate) {
+      return;
+    }
+    if (path.isAbsolute(candidate)) {
+      add(type, candidate);
+      return;
+    }
+    if (workspaceRoot) {
+      add(`${type} (workspace)`, path.join(workspaceRoot, candidate));
+    }
+    add(`${type} (extension)`, path.join(extensionRoot, candidate));
+    add(`${type} (cwd)`, path.resolve(candidate));
+  };
+
+  const installDirs = (): string[] => {
+    const dirs: string[] = [];
+    const pathEnv = process.env.PATH || "";
+    for (const dir of pathEnv.split(path.delimiter)) {
+      if (dir) {
+        dirs.push(dir);
+      }
+    }
+
+    const home = process.env.HOME || process.env.USERPROFILE;
+    if (home) {
+      dirs.push(path.join(home, ".local", "bin"));
+      dirs.push(path.join(home, "bin"));
+    }
+
+    if (isWindows) {
+      const localAppData = process.env.LOCALAPPDATA;
+      if (localAppData) {
+        dirs.push(path.join(localAppData, "Programs", "MoZuku", "bin"));
+        dirs.push(path.join(localAppData, "Programs", "mozuku-lsp", "bin"));
+      }
+      for (const base of [
+        process.env.ProgramFiles,
+        process.env["ProgramFiles(x86)"],
+      ]) {
+        if (!base) {
+          continue;
+        }
+        dirs.push(path.join(base, "MoZuku", "bin"));
+        dirs.push(path.join(base, "mozuku-lsp", "bin"));
+      }
+    } else {
+      dirs.push("/usr/local/bin");
+      dirs.push("/usr/bin");
+      if (process.platform === "darwin") {
+        dirs.push("/opt/homebrew/bin");
+        dirs.push("/opt/local/bin");
+      }
+    }
+
+    return dirs;
+  };
+
+  const addCommandSearch = (type: string, commandName: string | undefined) => {
+    if (!commandName || hasPathSep(commandName)) {
+      return;
+    }
+    const names =
+      isWindows && !commandName.toLowerCase().endsWith(".exe")
+        ? [commandName, `${commandName}.exe`]
+        : [commandName];
+    for (const dir of installDirs()) {
+      for (const name of names) {
+        add(type, path.join(dir, name));
+      }
+    }
+  };
+
+  if (configuredValue && hasPathSep(configuredValue)) {
+    addResolvedPath("設定済み", configuredValue);
+  }
+  if (envValue && hasPathSep(envValue)) {
+    addResolvedPath("環境変数 MOZUKU_LSP", envValue);
+  }
+
+  addCommandSearch(
+    "設定済みコマンド",
+    configuredValue && !hasPathSep(configuredValue)
+      ? configuredValue
+      : undefined,
+  );
+  addCommandSearch(
+    "環境変数 MOZUKU_LSP",
+    envValue && !hasPathSep(envValue) ? envValue : undefined,
+  );
+  addCommandSearch("デフォルトコマンド", exeName);
+
+  add(
+    "パッケージ済み",
+    vscode.Uri.joinPath(ctx.extensionUri, "bin", exeName).fsPath,
+  );
+
+  add(
+    "パッケージ済み",
+    vscode.Uri.joinPath(
+      ctx.extensionUri,
+      "server",
+      "bin",
+      `${process.platform}-${process.arch}`,
+      exeName,
+    ).fsPath,
+  );
+
+  if (workspaceRoot) {
+    add(
+      "ワークスペース-install",
+      path.join(workspaceRoot, "build", "install", "bin", exeName),
+    );
+    add("ワークスペース-build", path.join(workspaceRoot, "build", exeName));
+    add(
+      "ワークスペース-install",
+      path.join(
+        workspaceRoot,
+        "mozuku-lsp",
+        "build",
+        "install",
+        "bin",
+        exeName,
+      ),
+    );
+    add(
+      "ワークスペース-build",
+      path.join(workspaceRoot, "mozuku-lsp", "build", exeName),
+    );
+  }
+
+  add(
+    "開発-install",
+    path.join(
+      extensionRoot,
+      "..",
+      "mozuku-lsp",
+      "build",
+      "install",
+      "bin",
+      exeName,
+    ),
+  );
+  add(
+    "開発-build",
+    path.join(extensionRoot, "..", "mozuku-lsp", "build", exeName),
+  );
+
+  for (const candidate of candidates) {
+    if (fs.existsSync(candidate.path)) {
+      if (isDebug) {
+        console.log(`[MoZuku] ${candidate.type}パスを使用:`, candidate.path);
+      }
+      return candidate.path;
+    }
+    if (isDebug) {
+      console.log(
+        `[MoZuku] ${candidate.type}パスが見つかりません:`,
+        candidate.path,
+      );
+    }
+  }
+
+  const fallback = configuredValue || envValue || exeName;
+  if (isDebug) {
+    console.log("[MoZuku] フォールバックパスを使用:", fallback);
+  }
+  return fallback;
+}
+
+function hasPathSep(candidate: string): boolean {
+  return candidate.includes("/") || candidate.includes("\\");
+}