diff --git a/config.yml b/config.yml index 5e29d6fa18..4e5b077a35 100644 --- a/config.yml +++ b/config.yml @@ -1269,17 +1269,17 @@ nodes: - name: opening_loc type: location comment: | - Represents the location of the opening `|`. + Represents the location of the opening `{` or `do`. [1, 2, 3].each { |i| puts x } - ^ + ^ - name: closing_loc type: location comment: | - Represents the location of the closing `|`. + Represents the location of the closing `}` or `end`. [1, 2, 3].each { |i| puts x } - ^ + ^ comment: | Represents a block of ruby code. diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 3570af136a..53bb8f0ec5 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -76,6 +76,15 @@ def slice(byte_offset, length) source.byteslice(byte_offset, length) or raise end + # Converts the line number to a byte offset corresponding to the start of that line + def line_to_byte_offset(line) + l = line - @start_line + if l < 0 || l >= offsets.size + raise ArgumentError, "line #{line} is out of range" + end + offsets[l] + end + # Binary search through the offsets to find the line number for the given # byte offset. def line(byte_offset) @@ -895,4 +904,51 @@ def initialize(locals, forwarding) def self.scope(locals: [], forwarding: []) Scope.new(locals, forwarding) end + + # Given a Method, UnboundMethod or Proc, use its #source_location to parse the file and return a Prism node representing it. + # The returned node will either be a DefNode, LambdaNode, CallNode or ForNode. + # Raises ArgumentError if it cannot be found for any reason. + # Only works on Ruby 4+ as it needs #source_location to contain column and end line information. + def self.node_for(callable) + unless callable.is_a?(Method) || callable.is_a?(UnboundMethod) || callable.is_a?(Proc) + raise ArgumentError, 'Prism.node_for requires a Method, UnboundMethod or Proc' + end + source_location = callable.source_location + raise ArgumentError, "#source_location is nil for #{callable}" if source_location.nil? + raise ArgumentError, '#source_location does not contain column and end_line, this method only works on Ruby 4+' if source_location.size != 5 + file, start_line, start_column, end_line, end_column = source_location #: [String, Integer, Integer, Integer, Integer] + + unless File.exist?(file) + raise ArgumentError, "#source_location[0] is #{file} but this file does not exist" + end + + parse_result = Prism.parse_file(file, version: "current") + unless parse_result.success? + raise ArgumentError, "#{file} has syntax errors: #{parse_result.errors_format}" + end + root = parse_result.value + # CRuby currently returns the source_location columns in bytes and not characters + start_offset = parse_result.source.line_to_byte_offset(start_line) + start_column + end_offset = parse_result.source.line_to_byte_offset(end_line) + end_column + + found = root.tunnel(start_line, start_column).reverse.find do |node| + case node + when DefNode + node.start_offset == start_offset && node.end_offset == end_offset + when LambdaNode + # Proc#source_location returns start_column 2 for `-> { ... }` (just after the `->`) + node.operator_loc.end_offset == start_offset && node.end_offset == end_offset + when CallNode + # Proc#source_location returns start_column 5 for `proc { ... }` (the `{`) + node.block.is_a?(BlockNode) && node.block.opening_loc.start_offset == start_offset && node.end_offset == end_offset + when ForNode + node.start_offset == start_offset && node.end_offset == end_offset + else + false + end + end #: DefNode | LambdaNode | CallNode | ForNode + + raise ArgumentError, "Could not find node for #{callable} in #{file} at (#{start_line},#{start_column})-(#{end_line},#{end_column})" unless found + found + end end diff --git a/sig/prism/parse_result.rbs b/sig/prism/parse_result.rbs index e88e5f0664..d878ca2edd 100644 --- a/sig/prism/parse_result.rbs +++ b/sig/prism/parse_result.rbs @@ -14,6 +14,7 @@ module Prism def encoding: () -> Encoding def lines: () -> Array[String] def slice: (Integer byte_offset, Integer length) -> String + def line_to_byte_offset: (Integer line) -> Integer def line: (Integer byte_offset) -> Integer def line_start: (Integer byte_offset) -> Integer def line_end: (Integer byte_offset) -> Integer diff --git a/templates/lib/prism/node.rb.erb b/templates/lib/prism/node.rb.erb index ceee2b0ffe..02b7136778 100644 --- a/templates/lib/prism/node.rb.erb +++ b/templates/lib/prism/node.rb.erb @@ -184,24 +184,13 @@ module Prism queue = [self] #: Array[Prism::node] result = [] #: Array[Prism::node] + search_offset = source.line_to_byte_offset(line) + column + while (node = queue.shift) result << node node.compact_child_nodes.each do |child_node| - child_location = child_node.location - - start_line = child_location.start_line - end_line = child_location.end_line - - if start_line == end_line - if line == start_line && column >= child_location.start_column && column < child_location.end_column - queue << child_node - break - end - elsif (line == start_line && column >= child_location.start_column) || (line == end_line && column < child_location.end_column) - queue << child_node - break - elsif line > start_line && line < end_line + if child_node.start_offset <= search_offset && search_offset < child_node.end_offset queue << child_node break end diff --git a/templates/sig/prism.rbs.erb b/templates/sig/prism.rbs.erb index 5c74cee8f8..c943ea4a06 100644 --- a/templates/sig/prism.rbs.erb +++ b/templates/sig/prism.rbs.erb @@ -31,7 +31,8 @@ module Prism ?main_script: bool, ?offset: Integer, ?scopes: Array[Array[Symbol]], - ?verbose: bool + ?verbose: bool, + ?version: String ) -> <%= return_type %> <%- end -%> @@ -68,7 +69,8 @@ module Prism ?main_script: bool, ?offset: Integer, ?scopes: Array[Array[Symbol]], - ?verbose: bool + ?verbose: bool, + ?version: String ) -> <%= return_type %> <%- end -%> @@ -86,8 +88,11 @@ module Prism ?main_script: bool, ?offset: Integer, ?scopes: Array[Array[Symbol]], - ?verbose: bool + ?verbose: bool, + ?version: String ) -> ParseResult def self.scope: (?locals: Array[Symbol], ?forwarding: Array[Symbol]) -> Scope + + def self.node_for: (Method | UnboundMethod | Proc callable) -> (DefNode | LambdaNode | CallNode | ForNode) end diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb index fefe9def91..e733ed99ea 100644 --- a/test/prism/newline_test.rb +++ b/test/prism/newline_test.rb @@ -17,6 +17,8 @@ class NewlineTest < TestCase result/breadth_first_search_test.rb result/static_literals_test.rb result/warnings_test.rb + ruby/inline_method.rb + ruby/node_for_test.rb ruby/parser_test.rb ruby/ruby_parser_test.rb ] diff --git a/test/prism/ruby/inline_method.rb b/test/prism/ruby/inline_method.rb new file mode 100644 index 0000000000..f7fe8d319e --- /dev/null +++ b/test/prism/ruby/inline_method.rb @@ -0,0 +1,7 @@ +# A separate file because 2.7 can't parse this +module Prism + class NodeForTest < TestCase + def inline_method = 42 + INLINE_LOCATION_AND_FILE = [[__LINE__-1, 4, __LINE__-1, 26], __FILE__] + end +end diff --git a/test/prism/ruby/node_for_test.rb b/test/prism/ruby/node_for_test.rb new file mode 100644 index 0000000000..141fb020fb --- /dev/null +++ b/test/prism/ruby/node_for_test.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true +# typed: ignore + +require_relative "../test_helper" + +# Needs Prism.parse_file(file, version: "current") +return if RUBY_VERSION < "3.3" + +require_relative 'inline_method' + +module Prism + class NodeForTest < TestCase + INDENT = ' ' * 4 + + def m(foo) + 42 + end + M_LOCATION = [__LINE__-3, 4, __LINE__-1, 7] + + def été; 42; end + UTF8_LOCATION = [__LINE__-1, 4, __LINE__-1, 22] + + define_method(:define_method_method) { 42 } + DEFINE_METHOD_LOCATION = [__LINE__-1, 41, __LINE__-1, 47] + + def return_block(&block) + block + end + + iter = Object.new + def iter.each(&block) + block.call(block) + end + + for pr in iter + 42 + end + FOR_BODY_PROC = pr + FOR_BODY_PROC_LOCATION = [__LINE__-4, 4, __LINE__-2, 7] + + def with_location(callable, locs, file = __FILE__) + source_location = [file, *locs] + if RUBY_VERSION >= "4.0" + assert_equal callable.source_location, source_location + else + callable.define_singleton_method(:source_location) { source_location } + end + callable + end + + def test_def_method + node = Prism.node_for(with_location(NodeForTest.instance_method(:m), M_LOCATION)) + assert_instance_of(Prism::DefNode, node) + assert_equal "def m(foo)\n#{INDENT} 42\n#{INDENT}end", node.slice + + node = Prism.node_for(with_location(method(:m), M_LOCATION)) + assert_instance_of(Prism::DefNode, node) + assert_equal "def m(foo)\n 42\n end", node.slice + end + + def test_def_method_utf8 + node = Prism.node_for(with_location(method(:été), UTF8_LOCATION)) + assert_instance_of(Prism::DefNode, node) + assert_equal "def été; 42; end", node.slice + end + + def test_inline_method + node = Prism.node_for(with_location(method(:inline_method), *INLINE_LOCATION_AND_FILE)) + assert_instance_of(Prism::DefNode, node) + assert_equal "def inline_method = 42", node.slice + end + + def test_define_method + node = Prism.node_for(with_location(method(:define_method_method), DEFINE_METHOD_LOCATION)) + assert_instance_of(Prism::CallNode, node) + assert_equal "define_method(:define_method_method) { 42 }", node.slice + assert_equal "{ 42 }", node.block.slice + end + + def test_lambda + node = Prism.node_for(with_location(-> { 42 }, [__LINE__, 44, __LINE__, 51])) + assert_instance_of(Prism::LambdaNode, node) + assert_equal "-> { 42 }", node.slice + assert_equal "{ 42 }", node.opening_loc.join(node.closing_loc).slice + + node = Prism.node_for(with_location(lambda { 42 }, [__LINE__, 49, __LINE__, 55])) + assert_instance_of(Prism::CallNode, node) + assert_equal "lambda { 42 }", node.slice + assert_equal "{ 42 }", node.block.slice + end + + def test_proc + node = Prism.node_for(with_location(proc { 42 }, [__LINE__, 47, __LINE__, 53])) + assert_instance_of(Prism::CallNode, node) + assert_equal "proc { 42 }", node.slice + assert_equal "{ 42 }", node.block.slice + + node = Prism.node_for(with_location(return_block { 42 }, [__LINE__, 55, __LINE__, 61])) + assert_instance_of(Prism::CallNode, node) + assert_equal "return_block { 42 }", node.slice + assert_equal "{ 42 }", node.block.slice + + heredoc_proc = proc { <<~END } + heredoc + END + node = Prism.node_for(with_location(heredoc_proc, [__LINE__-3, 26, __LINE__-3, 36])) + assert_instance_of(Prism::CallNode, node) + assert_equal "proc { <<~END }", node.slice + assert_equal "heredoc\n", node.block.body.body.first.unescaped + end + + def test_method_to_proc + node = Prism.node_for(with_location(method(:inline_method).to_proc, *INLINE_LOCATION_AND_FILE)) + assert_instance_of(Prism::DefNode, node) + assert_equal "def inline_method = 42", node.slice + end + + def test_for + node = Prism.node_for(with_location(FOR_BODY_PROC, FOR_BODY_PROC_LOCATION)) + assert_instance_of(Prism::ForNode, node) + assert_equal "for pr in iter\n#{INDENT} 42\n#{INDENT}end", node.slice + assert_equal "42", node.statements.slice + end + + def test_eval + l = with_location(eval("-> { 42 }"), [1, 2, 1, 9], "(eval at #{__FILE__}:#{__LINE__})") + e = assert_raise(ArgumentError) { Prism.node_for(l) } + assert_include e.message, 'eval' + + l = eval "-> { 42 }", nil, __FILE__, __LINE__ + l = with_location(l, [__LINE__-1, 2, __LINE__-1, 9]) + e = assert_raise(ArgumentError) { Prism.node_for(l) } + assert_include e.message, 'Could not find node' + end + end +end diff --git a/test/prism/ruby/source_test.rb b/test/prism/ruby/source_test.rb new file mode 100644 index 0000000000..afd2825765 --- /dev/null +++ b/test/prism/ruby/source_test.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class SourceTest < TestCase + def test_line_to_byte_offset + parse_result = Prism.parse(<<~SRC) + abcd + efgh + ijkl + SRC + source = parse_result.source + + assert_equal 0, source.line_to_byte_offset(1) + assert_equal 5, source.line_to_byte_offset(2) + assert_equal 10, source.line_to_byte_offset(3) + assert_equal 15, source.line_to_byte_offset(4) + e = assert_raise(ArgumentError) { source.line_to_byte_offset(5) } + assert_equal "line 5 is out of range", e.message + e = assert_raise(ArgumentError) { source.line_to_byte_offset(0) } + assert_equal "line 0 is out of range", e.message + e = assert_raise(ArgumentError) { source.line_to_byte_offset(-1) } + assert_equal "line -1 is out of range", e.message + end + + def test_line_to_byte_offset_with_start_line + parse_result = Prism.parse(<<~SRC, line: 11) + abcd + efgh + ijkl + SRC + source = parse_result.source + + assert_equal 0, source.line_to_byte_offset(11) + assert_equal 5, source.line_to_byte_offset(12) + assert_equal 10, source.line_to_byte_offset(13) + assert_equal 15, source.line_to_byte_offset(14) + e = assert_raise(ArgumentError) { source.line_to_byte_offset(15) } + assert_equal "line 15 is out of range", e.message + e = assert_raise(ArgumentError) { source.line_to_byte_offset(10) } + assert_equal "line 10 is out of range", e.message + e = assert_raise(ArgumentError) { source.line_to_byte_offset(9) } + assert_equal "line 9 is out of range", e.message + end + end +end