uri/common.rb

Author

Akira Yamada <akira@ruby-lang.org>

Revision

$Id$

License

You can redistribute it and/or modify it under the same term as Ruby.

URI support for Ruby

Author

Akira Yamada <akira@ruby-lang.org>

Documentation

Akira Yamada <akira@ruby-lang.org>, Dmitry V. Sabanin <sdmitry@lrn.ru>

License

Copyright © 2001 akira yamada <akira@ruby-lang.org> You can redistribute it and/or modify it under the same term as Ruby.

Revision

$Id$

See URI for documentation

Namespace
Methods
E
J
P
R
S
Included Modules
Class Public methods
extract(str, schemes = nil, &block)

Synopsis

URI::extract(str[, schemes][,&blk])

Args

str

String to extract URIs from.

schemes

Limit URI matching to a specific schemes.

Description

Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.

Usage

require "uri"
URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.")
# => ["http://foo.example.com/bla", "mailto:test@example.com"]
# File lib/uri/common.rb, line 551
def self.extract(str, schemes = nil, &block)
  if block_given?
    str.scan(regexp(schemes)) { yield $& }
    nil
  else
    result = []
    str.scan(regexp(schemes)) { result.push $& }
    result
  end
end
join(*str)

Synopsis

URI::join(str[, str, ...])

Args

str

String(s) to work with

Description

Joins URIs.

Usage

require 'uri'
p URI.join("http://localhost/","main.rbx")
# => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>
# File lib/uri/common.rb, line 519
def self.join(*str)
  u = self.parse(str[0])
  str[1 .. -1].each do |x|
    u = u.merge(x)
  end
  u
end
parse(uri)

Synopsis

URI::parse(uri_str)

Args

uri_str

String with URI.

Description

Creates one of the URI's subclasses instance from the string.

Raises

URI::InvalidURIError

Raised if URI given is not a correct one.

Usage

require 'uri'
uri = URI.parse("http://www.ruby-lang.org/")
p uri
# => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
p uri.scheme
# => "http" 
p uri.host
# => "www.ruby-lang.org"
# File lib/uri/common.rb, line 483
def self.parse(uri)
  scheme, userinfo, host, port,
    registry, path, opaque, query, fragment = self.split(uri)
  if scheme && @@schemes.include?(scheme.upcase)
    @@schemes[scheme.upcase].new(scheme, userinfo, host, port,
                                 registry, path, opaque, query,
                                 fragment)
  else
    Generic.new(scheme, userinfo, host, port,
                registry, path, opaque, query,
                fragment)
  end
end
regexp(schemes = nil)

Synopsis

URI::regexp([match_schemes])

Args

match_schemes

Array of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.

Description

Returns a Regexp object which matches to URI-like strings. The Regexp object returned by this method includes arbitrary number of capture group (parentheses). Never rely on it's number.

Usage

require 'uri'
# extract first URI from html_string
html_string.slice(URI.regexp)
# remove ftp URIs
html_string.sub(URI.regexp(['ftp'])
# You should not rely on the number of parentheses
html_string.scan(URI.regexp) do |*matches|
  p $&
end
# File lib/uri/common.rb, line 593
def self.regexp(schemes = nil)
  unless schemes
    ABS_URI_REF
  else
    /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/n
  end
end
split(uri)

Synopsis

URI::split(uri)

Args

uri

String with URI.

Description

Splits the string on following parts and returns array with result:

* Scheme
* Userinfo
* Host
* Port
* Registry
* Path
* Opaque
* Query
* Fragment

Usage

require 'uri'
p URI.split("http://www.ruby-lang.org/")
# => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
# File lib/uri/common.rb, line 380
def self.split(uri)
  case uri
  when ''
    # null uri
  when ABS_URI
    scheme, opaque, userinfo, host, port,
      registry, path, query, fragment = $~[1..-1]
    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    # absoluteURI   = scheme ":" ( hier_part | opaque_part )
    # hier_part     = ( net_path | abs_path ) [ "?" query ]
    # opaque_part   = uric_no_slash *uric
    # abs_path      = "/"  path_segments
    # net_path      = "//" authority [ abs_path ]
    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]
    if !scheme
      raise InvalidURIError,
        "bad URI(absolute but no scheme): #{uri}"
    end
    if !opaque && (!path && (!host && !registry))
      raise InvalidURIError,
        "bad URI(absolute but no path): #{uri}"
    end
  when REL_URI
    scheme = nil
    opaque = nil
    userinfo, host, port, registry,
      rel_segment, abs_path, query, fragment = $~[1..-1]
    if rel_segment && abs_path
      path = rel_segment + abs_path
    elsif rel_segment
      path = rel_segment
    elsif abs_path
      path = abs_path
    end
    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
    # net_path      = "//" authority [ abs_path ]
    # abs_path      = "/"  path_segments
    # rel_path      = rel_segment [ abs_path ]
    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]
  else
    raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  end
  path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  ret = [
    scheme,
    userinfo, host, port,         # X
    registry,                        # X
    path,                         # Y
    opaque,                        # Y
    query,
    fragment
  ]
  return ret
end