Class | RDoc::C_Parser |
In: |
lib/rdoc/parsers/parse_c.rb
|
Parent: | Object |
We attempt to parse C extension files. Basically we look for the standard patterns that you find in extensions: rb_define_class, rb_define_method and so on. We also try to find the corresponding C source for the methods and extract comments, but if we fail we don‘t worry too much.
The comments associated with a Ruby method are extracted from the C comment block associated with the routine that implements that method, that is to say the method whose name is given in the rb_define_method call. For example, you might write:
/* * Returns a new array that is a one-dimensional flattening of this * array (recursively). That is, for every element that is an array, * extract its elements into the new array. * * s = [ 1, 2, 3 ] #=> [1, 2, 3] * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]] * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10] * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] */ static VALUE rb_ary_flatten(ary) VALUE ary; { ary = rb_obj_dup(ary); rb_ary_flatten_bang(ary); return ary; } ... void Init_Array() { ... rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
Here RDoc will determine from the rb_define_method line that there‘s a method called "flatten" in class Array, and will look for the implementation in the method rb_ary_flatten. It will then use the comment from that method in the HTML output. This method must be in the same source file as the rb_define_method.
C classes can be diagrammed (see /tc/dl/ruby/ruby/error.c), and RDoc integrates C and Ruby source into one tree
The comment blocks may include special directives:
In addition, RDoc assumes by default that the C method implementing a Ruby function is in the same source file as the rb_define_method call. If this isn‘t the case, add the comment
rb_define_method(....); // in: filename
As an example, we might have an extension that defines multiple classes in its Init_xxx method. We could document them using
/* * Document-class: MyClass * * Encapsulate the writing and reading of the configuration * file. ... */ /* * Document-method: read_value * * call-seq: * cfg.read_value(key) -> value * cfg.read_value(key} { |key| } -> value * * Return the value corresponding to +key+ from the configuration. * In the second form, if the key isn't found, invoke the * block and return its value. */
progress | [RW] |
prepare to parse a C file
# File lib/rdoc/parsers/parse_c.rb, line 177 177: def initialize(top_level, file_name, body, options, stats) 178: @known_classes = KNOWN_CLASSES.dup 179: @body = handle_tab_width(handle_ifdefs_in(body)) 180: @options = options 181: @stats = stats 182: @top_level = top_level 183: @classes = Hash.new 184: @file_dir = File.dirname(file_name) 185: @progress = $stderr unless options.quiet 186: end
Extract the classes/modules and methods from a C file and return the corresponding top-level object
# File lib/rdoc/parsers/parse_c.rb, line 190 190: def scan 191: remove_commented_out_lines 192: do_classes 193: do_constants 194: do_methods 195: do_includes 196: do_aliases 197: @top_level 198: end
# File lib/rdoc/parsers/parse_c.rb, line 455 455: def do_aliases 456: @body.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do 457: |var_name, new_name, old_name| 458: @stats.num_methods += 1 459: class_name = @known_classes[var_name] || var_name 460: class_obj = find_class(var_name, class_name) 461: 462: class_obj.add_alias(Alias.new("", old_name, new_name, "")) 463: end 464: end
# File lib/rdoc/parsers/parse_c.rb, line 318 318: def do_classes 319: @body.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do 320: |var_name, class_name| 321: handle_class_module(var_name, "module", class_name, nil, nil) 322: end 323: 324: # The '.' lets us handle SWIG-generated files 325: @body.scan(/([\w\.]+)\s* = \s*rb_define_class\s* 326: \( 327: \s*"(\w+)", 328: \s*(\w+)\s* 329: \)/mx) do 330: 331: |var_name, class_name, parent| 332: handle_class_module(var_name, "class", class_name, parent, nil) 333: end 334: 335: @body.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do 336: |var_name, class_name, parent| 337: parent = nil if parent == "0" 338: handle_class_module(var_name, "class", class_name, parent, nil) 339: end 340: 341: @body.scan(/(\w+)\s* = \s*rb_define_module_under\s* 342: \( 343: \s*(\w+), 344: \s*"(\w+)" 345: \s*\)/mx) do 346: 347: |var_name, in_module, class_name| 348: handle_class_module(var_name, "module", class_name, nil, in_module) 349: end 350: 351: @body.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s* 352: \( 353: \s*(\w+), 354: \s*"(\w+)", 355: \s*(\w+)\s* 356: \s*\)/mx) do 357: 358: |var_name, in_module, class_name, parent| 359: handle_class_module(var_name, "class", class_name, parent, in_module) 360: end 361: 362: end
# File lib/rdoc/parsers/parse_c.rb, line 366 366: def do_constants 367: @body.scan(%r{\Wrb_define_ 368: ( 369: variable | 370: readonly_variable | 371: const | 372: global_const | 373: ) 374: \s*\( 375: (?:\s*(\w+),)? 376: \s*"(\w+)", 377: \s*(.*?)\s*\)\s*; 378: }xm) do 379: 380: |type, var_name, const_name, definition| 381: var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel" 382: handle_constants(type, var_name, const_name, definition) 383: end 384: end
Look for includes of the form:
rb_include_module(rb_cArray, rb_mEnumerable);
# File lib/rdoc/parsers/parse_c.rb, line 721 721: def do_includes 722: @body.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m| 723: if cls = @classes[c] 724: m = @known_classes[m] || m 725: cls.add_include(Include.new(m, "")) 726: end 727: end 728: end
# File lib/rdoc/parsers/parse_c.rb, line 388 388: def do_methods 389: 390: @body.scan(%r{rb_define_ 391: ( 392: singleton_method | 393: method | 394: module_function | 395: private_method 396: ) 397: \s*\(\s*([\w\.]+), 398: \s*"([^"]+)", 399: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, 400: \s*(-?\w+)\s*\) 401: (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? 402: }xm) do 403: |type, var_name, meth_name, meth_body, param_count, source_file| 404: #" 405: 406: # Ignore top-object and weird struct.c dynamic stuff 407: next if var_name == "ruby_top_self" 408: next if var_name == "nstr" 409: next if var_name == "envtbl" 410: next if var_name == "argf" # it'd be nice to handle this one 411: 412: var_name = "rb_cObject" if var_name == "rb_mKernel" 413: handle_method(type, var_name, meth_name, 414: meth_body, param_count, source_file) 415: end 416: 417: @body.scan(%r{rb_define_attr\( 418: \s*([\w\.]+), 419: \s*"([^"]+)", 420: \s*(\d+), 421: \s*(\d+)\s*\); 422: }xm) do #" 423: |var_name, attr_name, attr_reader, attr_writer| 424: 425: #var_name = "rb_cObject" if var_name == "rb_mKernel" 426: handle_attr(var_name, attr_name, 427: attr_reader.to_i != 0, 428: attr_writer.to_i != 0) 429: end 430: 431: @body.scan(%r{rb_define_global_function\s*\( 432: \s*"([^"]+)", 433: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, 434: \s*(-?\w+)\s*\) 435: (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? 436: }xm) do #" 437: |meth_name, meth_body, param_count, source_file| 438: handle_method("method", "rb_mKernel", meth_name, 439: meth_body, param_count, source_file) 440: end 441: 442: @body.scan(/define_filetest_function\s*\( 443: \s*"([^"]+)", 444: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, 445: \s*(-?\w+)\s*\)/xm) do #" 446: |meth_name, meth_body, param_count| 447: 448: handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count) 449: handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count) 450: end 451: end
# File lib/rdoc/parsers/parse_c.rb, line 566 566: def find_attr_comment(attr_name) 567: if @body =~ %r{((?>/\*.*?\*/\s+)) 568: rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi 569: $1 570: elsif @body =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m 571: $1 572: else 573: '' 574: end 575: end
Find the C code corresponding to a Ruby method
# File lib/rdoc/parsers/parse_c.rb, line 626 626: def find_body(meth_name, meth_obj, body, quiet = false) 627: case body 628: when %r{((?>/\*.*?\*/\s*))(?:static\s+)?VALUE\s+#{meth_name} 629: \s*(\(.*?\)).*?^}xm 630: comment, params = $1, $2 631: body_text = $& 632: 633: remove_private_comments(comment) if comment 634: 635: # see if we can find the whole body 636: 637: re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}' 638: if Regexp.new(re, Regexp::MULTILINE).match(body) 639: body_text = $& 640: end 641: 642: # The comment block may have been overridden with a 643: # 'Document-method' block. This happens in the interpreter 644: # when multiple methods are vectored through to the same 645: # C method but those methods are logically distinct (for 646: # example Kernel.hash and Kernel.object_id share the same 647: # implementation 648: 649: override_comment = find_override_comment(meth_obj.name) 650: comment = override_comment if override_comment 651: 652: find_modifiers(comment, meth_obj) if comment 653: 654: # meth_obj.params = params 655: meth_obj.start_collecting_tokens 656: meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text)) 657: meth_obj.comment = mangle_comment(comment) 658: when %r{((?>/\*.*?\*/\s*))^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m 659: comment = $1 660: find_body($2, meth_obj, body, true) 661: find_modifiers(comment, meth_obj) 662: meth_obj.comment = mangle_comment(comment) + meth_obj.comment 663: when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m 664: unless find_body($1, meth_obj, body, true) 665: warn "No definition for #{meth_name}" unless quiet 666: return false 667: end 668: else 669: 670: # No body, but might still have an override comment 671: comment = find_override_comment(meth_obj.name) 672: 673: if comment 674: find_modifiers(comment, meth_obj) 675: meth_obj.comment = mangle_comment(comment) 676: else 677: warn "No definition for #{meth_name}" unless quiet 678: return false 679: end 680: end 681: true 682: end
# File lib/rdoc/parsers/parse_c.rb, line 740 740: def find_class(raw_name, name) 741: unless @classes[raw_name] 742: if raw_name =~ /^rb_m/ 743: @classes[raw_name] = @top_level.add_module(NormalModule, name) 744: else 745: @classes[raw_name] = @top_level.add_class(NormalClass, name, nil) 746: end 747: end 748: @classes[raw_name] 749: end
Look for class or module documentation above Init_+class_name+(void), in a Document-class class_name (or module) comment or above an rb_define_class (or module). If a comment is supplied above a matching Init_ and a rb_define_class the Init_ comment is used.
/* * This is a comment for Foo */ Init_Foo(void) { VALUE cFoo = rb_define_class("Foo", rb_cObject); } /* * Document-class: Foo * This is a comment for Foo */ Init_foo(void) { VALUE cFoo = rb_define_class("Foo", rb_cObject); } /* * This is a comment for Foo */ VALUE cFoo = rb_define_class("Foo", rb_cObject);
# File lib/rdoc/parsers/parse_c.rb, line 293 293: def find_class_comment(class_name, class_meth) 294: comment = nil 295: if @body =~ %r{((?>/\*.*?\*/\s+)) 296: (static\s+)?void\s+Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)?\)}xmi 297: comment = $1 298: elsif @body =~ %r{Document-(class|module):\s#{class_name}\s*?\n((?>.*?\*/))}m 299: comment = $2 300: else 301: if @body =~ /rb_define_(class|module)/m then 302: class_name = class_name.split("::").last 303: comments = [] 304: @body.split(/(\/\*.*?\*\/)\s*?\n/m).each_with_index do |chunk, index| 305: comments[index] = chunk 306: if chunk =~ /rb_define_(class|module).*?"(#{class_name})"/m then 307: comment = comments[index-1] 308: break 309: end 310: end 311: end 312: end 313: class_meth.comment = mangle_comment(comment) if comment 314: end
Finds a comment matching type and const_name either above the comment or in the matching Document- section.
# File lib/rdoc/parsers/parse_c.rb, line 523 523: def find_const_comment(type, const_name) 524: if @body =~ %r{((?>^\s*/\*.*?\*/\s+)) 525: rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi 526: $1 527: elsif @body =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m 528: $1 529: else 530: '' 531: end 532: end
If the comment block contains a section that looks like:
use it for the parameters.
# File lib/rdoc/parsers/parse_c.rb, line 694 694: def find_modifiers(comment, meth_obj) 695: if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or 696: comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '') 697: meth_obj.document_self = false 698: end 699: if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or 700: comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '') 701: seq = $1 702: seq.gsub!(/^\s*\*\s*/, '') 703: meth_obj.call_seq = seq 704: end 705: end
# File lib/rdoc/parsers/parse_c.rb, line 709 709: def find_override_comment(meth_name) 710: name = Regexp.escape(meth_name) 711: if @body =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m 712: $1 713: end 714: end
# File lib/rdoc/parsers/parse_c.rb, line 536 536: def handle_attr(var_name, attr_name, reader, writer) 537: rw = '' 538: if reader 539: #@stats.num_methods += 1 540: rw << 'R' 541: end 542: if writer 543: #@stats.num_methods += 1 544: rw << 'W' 545: end 546: 547: class_name = @known_classes[var_name] 548: 549: return unless class_name 550: 551: class_obj = find_class(var_name, class_name) 552: 553: if class_obj 554: comment = find_attr_comment(attr_name) 555: unless comment.empty? 556: comment = mangle_comment(comment) 557: end 558: att = Attr.new('', attr_name, rw, comment) 559: class_obj.add_attribute(att) 560: end 561: 562: end
# File lib/rdoc/parsers/parse_c.rb, line 230 230: def handle_class_module(var_name, class_mod, class_name, parent, in_module) 231: progress(class_mod[0, 1]) 232: 233: parent_name = @known_classes[parent] || parent 234: 235: if in_module 236: enclosure = @classes[in_module] 237: unless enclosure 238: if enclosure = @known_classes[in_module] 239: handle_class_module(in_module, (/^rb_m/ =~ in_module ? "module" : "class"), 240: enclosure, nil, nil) 241: enclosure = @classes[in_module] 242: end 243: end 244: unless enclosure 245: warn("Enclosing class/module '#{in_module}' for " + 246: "#{class_mod} #{class_name} not known") 247: return 248: end 249: else 250: enclosure = @top_level 251: end 252: 253: if class_mod == "class" 254: cm = enclosure.add_class(NormalClass, class_name, parent_name) 255: @stats.num_classes += 1 256: else 257: cm = enclosure.add_module(NormalModule, class_name) 258: @stats.num_modules += 1 259: end 260: cm.record_location(enclosure.toplevel) 261: 262: find_class_comment(cm.full_name, cm) 263: @classes[var_name] = cm 264: @known_classes[var_name] = cm.full_name 265: end
Adds constant comments. By providing some_value: at the start ofthe comment you can override the C value of the comment to give a friendly definition.
/* 300: The perfect score in bowling */ rb_define_const(cFoo, "PERFECT", INT2FIX(300);
Will override +INT2FIX(300)+ with the value +300+ in the output RDoc. Values may include quotes and escaped colons (\:).
# File lib/rdoc/parsers/parse_c.rb, line 477 477: def handle_constants(type, var_name, const_name, definition) 478: #@stats.num_constants += 1 479: class_name = @known_classes[var_name] 480: 481: return unless class_name 482: 483: class_obj = find_class(var_name, class_name) 484: 485: unless class_obj 486: warn("Enclosing class/module '#{const_name}' for not known") 487: return 488: end 489: 490: comment = find_const_comment(type, const_name) 491: 492: # In the case of rb_define_const, the definition and comment are in 493: # "/* definition: comment */" form. The literal ':' and '\' characters 494: # can be escaped with a backslash. 495: if type.downcase == 'const' then 496: elements = mangle_comment(comment).split(':') 497: if elements.nil? or elements.empty? then 498: con = Constant.new(const_name, definition, mangle_comment(comment)) 499: else 500: new_definition = elements[0..-2].join(':') 501: if new_definition.empty? then # Default to literal C definition 502: new_definition = definition 503: else 504: new_definition.gsub!("\:", ":") 505: new_definition.gsub!("\\", '\\') 506: end 507: new_definition.sub!(/\A(\s+)/, '') 508: new_comment = $1.nil? ? elements.last : "#{$1}#{elements.last.lstrip}" 509: con = Constant.new(const_name, new_definition, 510: mangle_comment(new_comment)) 511: end 512: else 513: con = Constant.new(const_name, definition, mangle_comment(comment)) 514: end 515: 516: class_obj.add_constant(con) 517: end
Removes ifdefs that would otherwise confuse us
# File lib/rdoc/parsers/parse_c.rb, line 766 766: def handle_ifdefs_in(body) 767: body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m) { $1 } 768: end
# File lib/rdoc/parsers/parse_c.rb, line 579 579: def handle_method(type, var_name, meth_name, 580: meth_body, param_count, source_file = nil) 581: progress(".") 582: 583: @stats.num_methods += 1 584: class_name = @known_classes[var_name] 585: 586: return unless class_name 587: 588: class_obj = find_class(var_name, class_name) 589: 590: if class_obj 591: if meth_name == "initialize" 592: meth_name = "new" 593: type = "singleton_method" 594: end 595: meth_obj = AnyMethod.new("", meth_name) 596: meth_obj.singleton = 597: %w{singleton_method module_function}.include?(type) 598: 599: p_count = (Integer(param_count) rescue -1) 600: 601: if p_count < 0 602: meth_obj.params = "(...)" 603: elsif p_count == 0 604: meth_obj.params = "()" 605: else 606: meth_obj.params = "(" + 607: (1..p_count).map{|i| "p#{i}"}.join(", ") + 608: ")" 609: end 610: 611: if source_file 612: file_name = File.join(@file_dir, source_file) 613: body = (@@known_bodies[source_file] ||= File.read(file_name)) 614: else 615: body = @body 616: end 617: if find_body(meth_body, meth_obj, body) and meth_obj.document_self 618: class_obj.add_method(meth_obj) 619: end 620: end 621: end
# File lib/rdoc/parsers/parse_c.rb, line 751 751: def handle_tab_width(body) 752: if /\t/ =~ body 753: tab_width = Options.instance.tab_width 754: body.split(/\n/).map do |line| 755: 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #` 756: line 757: end .join("\n") 758: else 759: body 760: end 761: end
Remove the /*’s and leading asterisks from C comments
# File lib/rdoc/parsers/parse_c.rb, line 733 733: def mangle_comment(comment) 734: comment.sub!(%r{/\*+}) { " " * $&.length } 735: comment.sub!(%r{\*+/}) { " " * $&.length } 736: comment.gsub!(/^[ \t]*\*/m) { " " * $&.length } 737: comment 738: end
# File lib/rdoc/parsers/parse_c.rb, line 204 204: def progress(char) 205: unless @options.quiet 206: @progress.print(char) 207: @progress.flush 208: end 209: end
removes lines that are commented out that might otherwise get picked up when scanning for classes and methods
# File lib/rdoc/parsers/parse_c.rb, line 226 226: def remove_commented_out_lines 227: @body.gsub!(%r{//.*rb_define_}, '//') 228: end