Module | RSS::ListenerMixin |
In: |
lib/rss/0.9.rb
lib/rss/1.0.rb lib/rss/parser.rb lib/rss/atom.rb |
CONTENT_PATTERN | = | /\s*([^=]+)=(["'])([^\2]+?)\2/ |
NAMESPLIT | = | /^(?:([\w:][-\w\d.]*):)?([\w:][-\w\d.]*)/ |
do_validate | [RW] | |
ignore_unknown_element | [RW] | |
rss | [R] |
# File lib/rss/parser.rb, line 269 269: def initialize 270: @rss = nil 271: @ignore_unknown_element = true 272: @do_validate = true 273: @ns_stack = [{"xml" => :xml}] 274: @tag_stack = [[]] 275: @text_stack = [''] 276: @proc_stack = [] 277: @last_element = nil 278: @version = @encoding = @standalone = nil 279: @xml_stylesheets = [] 280: @xml_child_mode = false 281: @xml_element = nil 282: @last_xml_element = nil 283: end
# File lib/rss/parser.rb, line 290 290: def instruction(name, content) 291: if name == "xml-stylesheet" 292: params = parse_pi_content(content) 293: if params.has_key?("href") 294: @xml_stylesheets << XMLStyleSheet.new(params) 295: end 296: end 297: end
# File lib/rss/parser.rb, line 352 352: def tag_end(name) 353: if DEBUG 354: p "end tag #{name}" 355: p @tag_stack 356: end 357: text = @text_stack.pop 358: tags = @tag_stack.pop 359: pr = @proc_stack.pop 360: pr.call(text, tags) unless pr.nil? 361: @ns_stack.pop 362: end
# File lib/rss/parser.rb, line 299 299: def tag_start(name, attributes) 300: @text_stack.push('') 301: 302: ns = @ns_stack.last.dup 303: attrs = {} 304: attributes.each do |n, v| 305: if /\Axmlns(?:\z|:)/ =~ n 306: ns[$POSTMATCH] = v 307: else 308: attrs[n] = v 309: end 310: end 311: @ns_stack.push(ns) 312: 313: prefix, local = split_name(name) 314: @tag_stack.last.push([_ns(ns, prefix), local]) 315: @tag_stack.push([]) 316: if @xml_child_mode 317: previous = @last_xml_element 318: element_attrs = attributes.dup 319: unless previous 320: ns.each do |ns_prefix, value| 321: next if ns_prefix == "xml" 322: key = ns_prefix.empty? ? "xmlns" : "xmlns:#{ns_prefix}" 323: element_attrs[key] ||= value 324: end 325: end 326: next_element = XML::Element.new(local, 327: prefix.empty? ? nil : prefix, 328: _ns(ns, prefix), 329: element_attrs) 330: previous << next_element if previous 331: @last_xml_element = next_element 332: pr = Proc.new do |text, tags| 333: if previous 334: @last_xml_element = previous 335: else 336: @xml_element = @last_xml_element 337: @last_xml_element = nil 338: end 339: end 340: @proc_stack.push(pr) 341: else 342: if @rss.nil? and respond_to?("initial_start_#{local}", true) 343: __send__("initial_start_#{local}", local, prefix, attrs, ns.dup) 344: elsif respond_to?("start_#{local}", true) 345: __send__("start_#{local}", local, prefix, attrs, ns.dup) 346: else 347: start_else_element(local, prefix, attrs, ns.dup) 348: end 349: end 350: end
# File lib/rss/parser.rb, line 364 364: def text(data) 365: if @xml_child_mode 366: @last_xml_element << data if @last_xml_element 367: else 368: @text_stack.last << data 369: end 370: end
set instance vars for version, encoding, standalone
# File lib/rss/parser.rb, line 286 286: def xmldecl(version, encoding, standalone) 287: @version, @encoding, @standalone = version, encoding, standalone 288: end
# File lib/rss/parser.rb, line 416 416: def check_ns(tag_name, prefix, ns, require_uri) 417: unless _ns(ns, prefix) == require_uri 418: if @do_validate 419: raise NSError.new(tag_name, prefix, require_uri) 420: else 421: # Force bind required URI with prefix 422: @ns_stack.last[prefix] = require_uri 423: end 424: end 425: end
# File lib/rss/parser.rb, line 454 454: def collect_attributes(tag_name, prefix, attrs, ns, klass) 455: attributes = {} 456: klass.get_attributes.each do |a_name, a_uri, required, element_name| 457: if a_uri.is_a?(String) or !a_uri.respond_to?(:include?) 458: a_uri = [a_uri] 459: end 460: unless a_uri == [""] 461: for prefix, uri in ns 462: if a_uri.include?(uri) 463: val = attrs["#{prefix}:#{a_name}"] 464: break if val 465: end 466: end 467: end 468: if val.nil? and a_uri.include?("") 469: val = attrs[a_name] 470: end 471: 472: if @do_validate and required and val.nil? 473: unless a_uri.include?("") 474: for prefix, uri in ns 475: if a_uri.include?(uri) 476: a_name = "#{prefix}:#{a_name}" 477: end 478: end 479: end 480: raise MissingAttributeError.new(tag_name, a_name) 481: end 482: 483: attributes[a_name] = val 484: end 485: attributes 486: end
# File lib/rss/1.0.rb, line 438 438: def initial_start_RDF(tag_name, prefix, attrs, ns) 439: check_ns(tag_name, prefix, ns, RDF::URI) 440: 441: @rss = RDF.new(@version, @encoding, @standalone) 442: @rss.do_validate = @do_validate 443: @rss.xml_stylesheets = @xml_stylesheets 444: @last_element = @rss 445: pr = Proc.new do |text, tags| 446: @rss.validate_for_stream(tags, @ignore_unknown_element) if @do_validate 447: end 448: @proc_stack.push(pr) 449: end
# File lib/rss/atom.rb, line 734 734: def initial_start_entry(tag_name, prefix, attrs, ns) 735: check_ns(tag_name, prefix, ns, Atom::URI) 736: 737: @rss = Atom::Entry.new(@version, @encoding, @standalone) 738: @rss.do_validate = @do_validate 739: @rss.xml_stylesheets = @xml_stylesheets 740: @rss.lang = attrs["xml:lang"] 741: @rss.base = attrs["xml:base"] 742: @last_element = @rss 743: pr = Proc.new do |text, tags| 744: @rss.validate_for_stream(tags) if @do_validate 745: end 746: @proc_stack.push(pr) 747: end
# File lib/rss/atom.rb, line 719 719: def initial_start_feed(tag_name, prefix, attrs, ns) 720: check_ns(tag_name, prefix, ns, Atom::URI) 721: 722: @rss = Atom::Feed.new(@version, @encoding, @standalone) 723: @rss.do_validate = @do_validate 724: @rss.xml_stylesheets = @xml_stylesheets 725: @rss.lang = attrs["xml:lang"] 726: @rss.base = attrs["xml:base"] 727: @last_element = @rss 728: pr = Proc.new do |text, tags| 729: @rss.validate_for_stream(tags) if @do_validate 730: end 731: @proc_stack.push(pr) 732: end
# File lib/rss/0.9.rb, line 413 413: def initial_start_rss(tag_name, prefix, attrs, ns) 414: check_ns(tag_name, prefix, ns, "") 415: 416: @rss = Rss.new(attrs['version'], @version, @encoding, @standalone) 417: @rss.do_validate = @do_validate 418: @rss.xml_stylesheets = @xml_stylesheets 419: @last_element = @rss 420: pr = Proc.new do |text, tags| 421: @rss.validate_for_stream(tags, @ignore_unknown_element) if @do_validate 422: end 423: @proc_stack.push(pr) 424: end
Extract the first name="value" pair from content. Works with single quotes according to the constant CONTENT_PATTERN. Return a Hash.
# File lib/rss/parser.rb, line 381 381: def parse_pi_content(content) 382: params = {} 383: content.scan(CONTENT_PATTERN) do |name, quote, value| 384: params[name] = value 385: end 386: params 387: end
# File lib/rss/parser.rb, line 488 488: def setup_next_element(tag_name, klass, attributes) 489: previous = @last_element 490: next_element = klass.new(@do_validate, attributes) 491: previous.set_next_element(tag_name, next_element) 492: @last_element = next_element 493: @last_element.parent = previous if klass.need_parent? 494: @xml_child_mode = @last_element.have_xml_content? 495: 496: Proc.new do |text, tags| 497: p(@last_element.class) if DEBUG 498: if @xml_child_mode 499: @last_element.content = @xml_element.to_s 500: xml_setter = @last_element.class.xml_setter 501: @last_element.__send__(xml_setter, @xml_element) 502: @xml_element = nil 503: @xml_child_mode = false 504: else 505: if klass.have_content? 506: if @last_element.need_base64_encode? 507: text = Base64.decode64(text.lstrip) 508: end 509: @last_element.content = text 510: end 511: end 512: if @do_validate 513: @last_element.validate_for_stream(tags, @ignore_unknown_element) 514: end 515: @last_element = previous 516: end 517: end
# File lib/rss/parser.rb, line 411 411: def split_name(name) 412: name =~ NAMESPLIT 413: [$1 || '', $2] 414: end
# File lib/rss/parser.rb, line 389 389: def start_else_element(local, prefix, attrs, ns) 390: class_name = self.class.class_name(_ns(ns, prefix), local) 391: current_class = @last_element.class 392: if class_name and 393: (current_class.const_defined?(class_name) or 394: current_class.constants.include?(class_name)) 395: next_class = current_class.const_get(class_name) 396: start_have_something_element(local, prefix, attrs, ns, next_class) 397: else 398: if !@do_validate or @ignore_unknown_element 399: @proc_stack.push(nil) 400: else 401: parent = "ROOT ELEMENT???" 402: if current_class.tag_name 403: parent = current_class.tag_name 404: end 405: raise NotExpectedTagError.new(local, _ns(ns, prefix), parent) 406: end 407: end 408: end
# File lib/rss/parser.rb, line 427 427: def start_get_text_element(tag_name, prefix, ns, required_uri) 428: pr = Proc.new do |text, tags| 429: setter = self.class.setter(required_uri, tag_name) 430: if @last_element.respond_to?(setter) 431: if @do_validate 432: getter = self.class.getter(required_uri, tag_name) 433: if @last_element.__send__(getter) 434: raise TooMuchTagError.new(tag_name, @last_element.tag_name) 435: end 436: end 437: @last_element.__send__(setter, text.to_s) 438: else 439: if @do_validate and !@ignore_unknown_element 440: raise NotExpectedTagError.new(tag_name, _ns(ns, prefix), 441: @last_element.tag_name) 442: end 443: end 444: end 445: @proc_stack.push(pr) 446: end
# File lib/rss/parser.rb, line 448 448: def start_have_something_element(tag_name, prefix, attrs, ns, klass) 449: check_ns(tag_name, prefix, ns, klass.required_uri) 450: attributes = collect_attributes(tag_name, prefix, attrs, ns, klass) 451: @proc_stack.push(setup_next_element(tag_name, klass, attributes)) 452: end