From 06843cbeb43a1ca752532ae08321293d9050c821 Mon Sep 17 00:00:00 2001 From: ahogappa Date: Sat, 25 Apr 2026 22:14:13 +0900 Subject: [PATCH 1/2] Make position encoding configurable in Service MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `position_encoding:` option to `TypeProf::Core::Service.new`. FileContext stores the encoding and computes Prism::Location columns accordingly. Default is `Encoding::UTF_16LE` (preserves existing behavior). UTF-8 uses Prism's native `start_column`/`end_column` (= byte offsets), because Prism's `code_units_cache(Encoding::UTF_8)` reports code points rather than bytes — the latter is what the LSP 3.17 spec defines for `utf-8`. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/typeprof/code_range.rb | 12 +++++----- lib/typeprof/core/ast.rb | 8 +++---- lib/typeprof/core/env.rb | 32 ++++++++++++++++++++------ lib/typeprof/core/service.rb | 6 ++--- test/core/position_encoding_test.rb | 35 +++++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 19 deletions(-) create mode 100644 test/core/position_encoding_test.rb diff --git a/lib/typeprof/code_range.rb b/lib/typeprof/code_range.rb index b8bfb234f..80b5ed1d5 100644 --- a/lib/typeprof/code_range.rb +++ b/lib/typeprof/code_range.rb @@ -55,12 +55,14 @@ def initialize(first, last) raise unless first end - def self.from_node(node, code_units_cache) + def self.from_node(node, file_context) node = node.location if node.respond_to?(:location) - if node.is_a?(Prism::Location) - pos1 = CodePosition.new(node.start_line, node.cached_start_code_units_column(code_units_cache)) - pos2 = CodePosition.new(node.end_line, node.cached_end_code_units_column(code_units_cache)) - elsif node.is_a?(RBS::Location) + case node + when Prism::Location + start_col, end_col = file_context.column_offsets_for(node) + pos1 = CodePosition.new(node.start_line, start_col) + pos2 = CodePosition.new(node.end_line, end_col) + when RBS::Location pos1 = CodePosition.new(*node.start_loc) pos2 = CodePosition.new(*node.end_loc) else diff --git a/lib/typeprof/core/ast.rb b/lib/typeprof/core/ast.rb index 43a43945c..5dbc50858 100644 --- a/lib/typeprof/core/ast.rb +++ b/lib/typeprof/core/ast.rb @@ -1,6 +1,6 @@ module TypeProf::Core class AST - def self.parse_rb(path, src) + def self.parse_rb(path, src, position_encoding) result = Prism.parse(src) return nil unless result.errors.empty? @@ -11,7 +11,7 @@ def self.parse_rb(path, src) raise unless raw_scope.type == :program_node prism_source = result.source - file_context = FileContext.new(path, prism_source, result.comments) + file_context = FileContext.new(path, position_encoding, prism_source, result.comments) cref = CRef::Toplevel lenv = LocalEnv.new(file_context, cref, {}, []) @@ -414,11 +414,11 @@ def self.parse_cpath(raw_node, cref) return cpath + names.reverse if cpath end - def self.parse_rbs(path, src) + def self.parse_rbs(path, src, position_encoding) _buffer, _directives, raw_decls = RBS::Parser.parse_signature(src) cref = CRef::Toplevel - file_context = FileContext.new(path) + file_context = FileContext.new(path, position_encoding) lenv = LocalEnv.new(file_context, cref, {}, []) raw_decls.map do |raw_decl| diff --git a/lib/typeprof/core/env.rb b/lib/typeprof/core/env.rb index 5b40d79dd..f2af8f209 100644 --- a/lib/typeprof/core/env.rb +++ b/lib/typeprof/core/env.rb @@ -259,14 +259,14 @@ def resolve_type_alias(cpath, name) mod.get_type_alias(name) end - def load_core_rbs(raw_decls) - file_context = FileContext.new(nil) + def load_core_rbs(raw_decls, position_encoding) + file_context = FileContext.new(nil, position_encoding) lenv = LocalEnv.new(file_context, CRef::Toplevel, {}, []) decls = raw_decls.map do |raw_decl| AST.create_rbs_decl(raw_decl, lenv) end.compact - decls += AST.parse_rbs("typeprof-rbs-shim.rbs", <<-RBS) + decls += AST.parse_rbs("typeprof-rbs-shim.rbs", <<-RBS, position_encoding) class Exception include _Exception end @@ -308,16 +308,34 @@ class Object end class FileContext - attr_reader :path, :comments - def initialize(path, prism_source = nil, comments = nil) + attr_reader :path, :comments, :position_encoding + def initialize(path, position_encoding = nil, prism_source = nil, comments = nil) @path = path + @position_encoding = position_encoding || Encoding::UTF_16LE @prism_source = prism_source @code_units_cache = nil @comments = comments end + # Returns [start_column, end_column] for a Prism::Location, in the session-configured + # position encoding. UTF-8 uses Prism's native byte columns directly (Prism's UTF-8 + # code_units_cache reports code points, not bytes — see LSP 3.17 spec). + def column_offsets_for(prism_location) + if @position_encoding == Encoding::UTF_8 + [prism_location.start_column, prism_location.end_column] + else + cache = code_units_cache + [ + prism_location.cached_start_code_units_column(cache), + prism_location.cached_end_code_units_column(cache), + ] + end + end + + private + def code_units_cache - @code_units_cache ||= @prism_source&.code_units_cache(Encoding::UTF_16LE) + @code_units_cache ||= @prism_source&.code_units_cache(@position_encoding) end end @@ -339,7 +357,7 @@ def initialize(file_context, cref, locals, return_boxes, forward_args = nil) def path = @file_context&.path def code_range_from_node(node) - TypeProf::CodeRange.from_node(node, @file_context&.code_units_cache) + TypeProf::CodeRange.from_node(node, @file_context) end def new_var(name, node) diff --git a/lib/typeprof/core/service.rb b/lib/typeprof/core/service.rb index 3ee9d08b7..f0fa098e8 100644 --- a/lib/typeprof/core/service.rb +++ b/lib/typeprof/core/service.rb @@ -7,7 +7,7 @@ def initialize(options) @rbs_text_nodes = {} @genv = GlobalEnv.new - @genv.load_core_rbs(load_rbs_declarations(@options[:rbs_collection]).declarations) + @genv.load_core_rbs(load_rbs_declarations(@options[:rbs_collection]).declarations, @options[:position_encoding]) Builtin.new(genv).deploy end @@ -58,7 +58,7 @@ def update_rb_file(path, code) prev_node = @rb_text_nodes[path] code = File.read(path) unless code - node = AST.parse_rb(path, code) + node = AST.parse_rb(path, code, @options[:position_encoding]) return false unless node node.diff(@rb_text_nodes[path]) if prev_node @@ -119,7 +119,7 @@ def update_rbs_file(path, code) code = File.read(path) unless code begin - decls = AST.parse_rbs(path, code) + decls = AST.parse_rbs(path, code, @options[:position_encoding]) rescue RBS::ParsingError return false end diff --git a/test/core/position_encoding_test.rb b/test/core/position_encoding_test.rb new file mode 100644 index 000000000..6f237a56b --- /dev/null +++ b/test/core/position_encoding_test.rb @@ -0,0 +1,35 @@ +require_relative "../helper" + +module TypeProf::Core + class PositionEncodingTest < Test::Unit::TestCase + def first_node_code_range(service, path) + nodes = service.instance_variable_get(:@rb_text_nodes)[path] + nodes.body.stmts.first.code_range + end + + def test_default_is_utf16 + service = TypeProf::Core::Service.new({}) + # 𐐀 (U+10400) is a non-BMP char: 4 bytes UTF-8, 2 code units UTF-16LE + service.update_rb_file("t.rb", "𐐀x = 1\n") + cr = first_node_code_range(service, "t.rb") + # "𐐀x = 1" ends at UTF-16 code-unit column 7 (2+1+1+1+1+1) + assert_equal(7, cr.last.column) + end + + def test_utf8_gives_byte_columns + service = TypeProf::Core::Service.new(position_encoding: Encoding::UTF_8) + service.update_rb_file("t.rb", "𐐀x = 1\n") + cr = first_node_code_range(service, "t.rb") + # "𐐀x = 1" ends at UTF-8 byte column 9 (4+1+1+1+1+1) + assert_equal(9, cr.last.column) + end + + def test_utf32_gives_code_point_columns + service = TypeProf::Core::Service.new(position_encoding: Encoding::UTF_32LE) + service.update_rb_file("t.rb", "𐐀x = 1\n") + cr = first_node_code_range(service, "t.rb") + # "𐐀x = 1" ends at UTF-32 code-unit (= code point) column 6 + assert_equal(6, cr.last.column) + end + end +end From b6318b0eba9910f74342698551ce96ada57810c8 Mon Sep 17 00:00:00 2001 From: ahogappa Date: Sat, 25 Apr 2026 22:14:43 +0900 Subject: [PATCH 2/2] Negotiate position encoding in LSP Initialize (LSP 3.17) Implements `general.positionEncodings` negotiation per LSP 3.17 spec. The server picks the first encoding from the client's preference list that it supports (`utf-8` / `utf-16` / `utf-32`) and reports it back via `capabilities.positionEncoding`. Falls back to UTF-16 (mandatory per spec) if the client doesn't propose any supported encoding. The negotiated value flows into per-workspace Services through `core_options.merge(position_encoding: ...)`, so each Service computes column positions in the agreed encoding. See: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocuments Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/typeprof/lsp/messages.rb | 5 +++++ lib/typeprof/lsp/server.rb | 34 +++++++++++++++++++++++++++++++-- test/lsp/lsp_test.rb | 37 ++++++++++++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/lib/typeprof/lsp/messages.rb b/lib/typeprof/lsp/messages.rb index c9a01dc43..d3656d087 100644 --- a/lib/typeprof/lsp/messages.rb +++ b/lib/typeprof/lsp/messages.rb @@ -56,6 +56,10 @@ def run class Message::Initialize < Message METHOD = "initialize" # request (required) def run + # Must negotiate encoding before add_workspaces so newly created Services honor it. + client_encodings = @params.dig(:capabilities, :general, :positionEncodings) + @server.negotiate_position_encoding(client_encodings) + folders = @params[:workspaceFolders].map do |folder| folder => { uri:, } @server.uri_to_path(uri) @@ -65,6 +69,7 @@ def run respond( capabilities: { + positionEncoding: @server.lsp_position_encoding, textDocumentSync: { openClose: true, change: 2, # Incremental diff --git a/lib/typeprof/lsp/server.rb b/lib/typeprof/lsp/server.rb index 92e73ad3e..a26bf8023 100644 --- a/lib/typeprof/lsp/server.rb +++ b/lib/typeprof/lsp/server.rb @@ -47,6 +47,15 @@ def self.start_socket(core_options, port = 0) end end + # see: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocuments + LSP_POSITION_ENCODINGS = { + Encoding::UTF_8 => "utf-8", + Encoding::UTF_16LE => "utf-16", + Encoding::UTF_32LE => "utf-32", + }.freeze + + private_constant :LSP_POSITION_ENCODINGS + def initialize(core_options, reader, writer, url_schema: nil) @core_options = core_options @cores = {} @@ -62,9 +71,29 @@ def initialize(core_options, reader, writer, url_schema: nil) @diagnostic_severity = :error end - attr_reader :open_texts + attr_reader :open_texts, :position_encoding attr_accessor :signature_enabled + # Pick the first mutually-supported encoding from the client's preference-ordered list + # and store it. Falls back to UTF-16LE (mandatory per LSP 3.17 spec). + def negotiate_position_encoding(client_encodings) + @position_encoding = pick_position_encoding(client_encodings) + end + + def lsp_position_encoding + LSP_POSITION_ENCODINGS.fetch(@position_encoding) + end + + def pick_position_encoding(client_encodings) + return Encoding::UTF_16LE unless client_encodings.is_a?(Array) + client_encodings.each do |enc| + encoding = LSP_POSITION_ENCODINGS.key(enc) + return encoding if encoding + end + Encoding::UTF_16LE + end + private :pick_position_encoding + #: (String) -> String def path_to_uri(path) @url_schema + File.expand_path(path).split("/").map {|s| CGI.escapeURIComponent(s) }.join("/") @@ -105,9 +134,10 @@ def add_workspaces(folders) end end @core_options[:exclude_patterns] = conf[:exclude] if conf[:exclude] + service_options = @core_options.merge(position_encoding: @position_encoding) conf[:analysis_unit_dirs].each do |dir| dir = File.expand_path(dir, path) - core = @cores[dir] = TypeProf::Core::Service.new(@core_options) + core = @cores[dir] = TypeProf::Core::Service.new(service_options) core.add_workspace(dir, @rbs_dir) end else diff --git a/test/lsp/lsp_test.rb b/test/lsp/lsp_test.rb index ad3fe2bf1..ba9e769ed 100644 --- a/test/lsp/lsp_test.rb +++ b/test/lsp/lsp_test.rb @@ -28,11 +28,14 @@ def setup @id = 0 end - def init(fixture) + def init(fixture, position_encodings: nil, expected_position_encoding: "utf-16") @folder = @lsp.path_to_uri(File.expand_path(File.join(__dir__, "..", "fixtures", fixture))) + "/" - id = request("initialize", workspaceFolders: [{ uri: @folder }]) + params = { workspaceFolders: [{ uri: @folder }] } + params[:capabilities] = { general: { positionEncodings: position_encodings } } if position_encodings + id = request("initialize", **params) expect_response(id) do |recv| assert_equal({ name: "typeprof", version: TypeProf::VERSION }, recv[:serverInfo]) + assert_equal(expected_position_encoding, recv[:capabilities][:positionEncoding]) end notify("initialized") end @@ -405,6 +408,36 @@ class Foo end end + def test_position_encoding_default + init("basic") + assert_equal(Encoding::UTF_16LE, @lsp.position_encoding) + end + + def test_position_encoding_utf8_preferred + init("basic", position_encodings: ["utf-8", "utf-16"], expected_position_encoding: "utf-8") + assert_equal(Encoding::UTF_8, @lsp.position_encoding) + end + + def test_position_encoding_empty_array + init("basic", position_encodings: [], expected_position_encoding: "utf-16") + assert_equal(Encoding::UTF_16LE, @lsp.position_encoding) + end + + def test_position_encoding_unsupported_only + init("basic", position_encodings: ["ascii"], expected_position_encoding: "utf-16") + assert_equal(Encoding::UTF_16LE, @lsp.position_encoding) + end + + def test_position_encoding_prefers_first_supported + init("basic", position_encodings: ["ascii", "utf-16", "utf-8"], expected_position_encoding: "utf-16") + assert_equal(Encoding::UTF_16LE, @lsp.position_encoding) + end + + def test_position_encoding_utf32_preferred + init("basic", position_encodings: ["utf-32", "utf-16"], expected_position_encoding: "utf-32") + assert_equal(Encoding::UTF_32LE, @lsp.position_encoding) + end + def test_type_definition_for_local_variable init("type_definition")