Module URI
In: lib/open-uri.rb
lib/uri/ldap.rb
lib/uri/ldaps.rb
lib/uri/generic.rb
lib/uri/http.rb
lib/uri/ftp.rb
lib/uri/common.rb
lib/uri/mailto.rb
lib/uri/https.rb
lib/uri.rb

URI support for Ruby

Author:Akira Yamada <akira@ruby-lang.org>
Documentation:Akira Yamada <akira@ruby-lang.org>, Dmitry V. Sabanin <sdmitry@lrn.ru>
License:Copyright (c) 2001 akira yamada <akira@ruby-lang.org> You can redistribute it and/or modify it under the same term as Ruby.
Revision:$Id: uri.rb 16038 2008-04-15 09:41:47Z kazu $

See URI for documentation

Methods

extract   join   parse   regexp   split  

Included Modules

REGEXP

Classes and Modules

Module URI::Escape
Module URI::REGEXP
Class URI::BadURIError
Class URI::Error
Class URI::FTP
Class URI::Generic
Class URI::HTTP
Class URI::HTTPS
Class URI::InvalidComponentError
Class URI::InvalidURIError
Class URI::LDAP
Class URI::LDAPS
Class URI::MailTo

Public Class methods

Synopsis

  URI::extract(str[, schemes][,&blk])

Args

str:String to extract URIs from.
schemes:Limit URI matching to a specific schemes.

Description

Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.

Usage

  require "uri"

  URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.")
  # => ["http://foo.example.com/bla", "mailto:test@example.com"]

[Source]

     # File lib/uri/common.rb, line 551
551:   def self.extract(str, schemes = nil, &block)
552:     if block_given?
553:       str.scan(regexp(schemes)) { yield $& }
554:       nil
555:     else
556:       result = []
557:       str.scan(regexp(schemes)) { result.push $& }
558:       result
559:     end
560:   end

Synopsis

  URI::join(str[, str, ...])

Args

str:String(s) to work with

Description

Joins URIs.

Usage

  require 'uri'

  p URI.join("http://localhost/","main.rbx")
  # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>

[Source]

     # File lib/uri/common.rb, line 519
519:   def self.join(*str)
520:     u = self.parse(str[0])
521:     str[1 .. -1].each do |x|
522:       u = u.merge(x)
523:     end
524:     u
525:   end

Synopsis

  URI::parse(uri_str)

Args

uri_str:String with URI.

Description

Creates one of the URI‘s subclasses instance from the string.

Raises

URI::InvalidURIError

  Raised if URI given is not a correct one.

Usage

  require 'uri'

  uri = URI.parse("http://www.ruby-lang.org/")
  p uri
  # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
  p uri.scheme
  # => "http"
  p uri.host
  # => "www.ruby-lang.org"

[Source]

     # File lib/uri/common.rb, line 483
483:   def self.parse(uri)
484:     scheme, userinfo, host, port, 
485:       registry, path, opaque, query, fragment = self.split(uri)
486: 
487:     if scheme && @@schemes.include?(scheme.upcase)
488:       @@schemes[scheme.upcase].new(scheme, userinfo, host, port, 
489:                                    registry, path, opaque, query, 
490:                                    fragment)
491:     else
492:       Generic.new(scheme, userinfo, host, port, 
493:                   registry, path, opaque, query, 
494:                   fragment)
495:     end
496:   end

Synopsis

  URI::regexp([match_schemes])

Args

match_schemes:Array of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.

Description

Returns a Regexp object which matches to URI-like strings. The Regexp object returned by this method includes arbitrary number of capture group (parentheses). Never rely on it‘s number.

Usage

  require 'uri'

  # extract first URI from html_string
  html_string.slice(URI.regexp)

  # remove ftp URIs
  html_string.sub(URI.regexp(['ftp'])

  # You should not rely on the number of parentheses
  html_string.scan(URI.regexp) do |*matches|
    p $&
  end

[Source]

     # File lib/uri/common.rb, line 593
593:   def self.regexp(schemes = nil)
594:     unless schemes
595:       ABS_URI_REF
596:     else
597:       /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
598:     end
599:   end

Synopsis

  URI::split(uri)

Args

uri:String with URI.

Description

Splits the string on following parts and returns array with result:

  * Scheme
  * Userinfo
  * Host
  * Port
  * Registry
  * Path
  * Opaque
  * Query
  * Fragment

Usage

  require 'uri'

  p URI.split("http://www.ruby-lang.org/")
  # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]

[Source]

     # File lib/uri/common.rb, line 380
380:   def self.split(uri)
381:     case uri
382:     when ''
383:       # null uri
384: 
385:     when ABS_URI
386:       scheme, opaque, userinfo, host, port, 
387:         registry, path, query, fragment = $~[1..-1]
388: 
389:       # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
390: 
391:       # absoluteURI   = scheme ":" ( hier_part | opaque_part )
392:       # hier_part     = ( net_path | abs_path ) [ "?" query ]
393:       # opaque_part   = uric_no_slash *uric
394: 
395:       # abs_path      = "/"  path_segments
396:       # net_path      = "//" authority [ abs_path ]
397: 
398:       # authority     = server | reg_name
399:       # server        = [ [ userinfo "@" ] hostport ]
400: 
401:       if !scheme
402:         raise InvalidURIError, 
403:           "bad URI(absolute but no scheme): #{uri}"
404:       end
405:       if !opaque && (!path && (!host && !registry))
406:         raise InvalidURIError,
407:           "bad URI(absolute but no path): #{uri}" 
408:       end
409: 
410:     when REL_URI
411:       scheme = nil
412:       opaque = nil
413: 
414:       userinfo, host, port, registry, 
415:         rel_segment, abs_path, query, fragment = $~[1..-1]
416:       if rel_segment && abs_path
417:         path = rel_segment + abs_path
418:       elsif rel_segment
419:         path = rel_segment
420:       elsif abs_path
421:         path = abs_path
422:       end
423: 
424:       # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
425: 
426:       # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
427: 
428:       # net_path      = "//" authority [ abs_path ]
429:       # abs_path      = "/"  path_segments
430:       # rel_path      = rel_segment [ abs_path ]
431: 
432:       # authority     = server | reg_name
433:       # server        = [ [ userinfo "@" ] hostport ]
434: 
435:     else
436:       raise InvalidURIError, "bad URI(is not URI?): #{uri}"
437:     end
438: 
439:     path = '' if !path && !opaque # (see RFC2396 Section 5.2)
440:     ret = [
441:       scheme, 
442:       userinfo, host, port,         # X
443:       registry,                        # X
444:       path,                         # Y
445:       opaque,                        # Y
446:       query,
447:       fragment
448:     ]
449:     return ret
450:   end

[Validate]