Module: URI::REGEXP::PATTERN
- Defined in:
- lib/uri/common.rb
Overview
Patterns used to parse URI’s
Constant Summary collapse
- ALPHA =
alpha = lowalpha | upalpha
"a-zA-Z"- ALNUM =
alphanum = alpha | digit
"#{ALPHA}\\d"- HEX =
hex = digit | “A” | “B” | “C” | “D” | “E” | “F” |
"a" | "b" | "c" | "d" | "e" | "f" "a-fA-F\\d"- ESCAPED =
escaped = “%” hex hex
"%[#{HEX}]{2}"- UNRESERVED =
mark = “-” | “_” | “.” | “!” | “~” | “*” | “‘” |
"(" | ")"unreserved = alphanum | mark
"-_.!~*'()#{ALNUM}"- RESERVED =
reserved = “;” | “/” | “?” | “:” | “@” | “&” | “=” | “+” |
"$" | ","reserved = “;” | “/” | “?” | “:” | “@” | “&” | “=” | “+” |
"$" | "," | "[" | "]" (RFC 2732) ";/?:@&=+$,\\[\\]"- URIC =
uric = reserved | unreserved | escaped
"(?:[#{UNRESERVED}#{RESERVED}]|#{ESCAPED})"- URIC_NO_SLASH =
uric_no_slash = unreserved | escaped | “;” | “?” | “:” | “@” |
"&" | "=" | "+" | "$" | "," "(?:[#{UNRESERVED};?:@&=+$,]|#{ESCAPED})"- QUERY =
query = *uric
"#{URIC}*"- FRAGMENT =
fragment = *uric
"#{URIC}*"- DOMLABEL =
domainlabel = alphanum | alphanum *( alphanum | “-” ) alphanum
"(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"- TOPLABEL =
toplabel = alpha | alpha *( alphanum | “-” ) alphanum
"(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"- HOSTNAME =
hostname = *( domainlabel “.” ) toplabel [ “.” ]
"(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"- IPV4ADDR =
RFC 2373, APPENDIX B: IPv6address = hexpart [ “:” IPv4address ] IPv4address = 1*3DIGIT “.” 1*3DIGIT “.” 1*3DIGIT “.” 1*3DIGIT hexpart = hexseq | hexseq “::” [ hexseq ] | “::” [ hexseq ] hexseq = hex4 *( “:” hex4) hex4 = 1*4HEXDIG
XXX: This definition has a flaw. “::” + IPv4address must be allowed too. Here is a replacement.
IPv4address = 1*3DIGIT “.” 1*3DIGIT “.” 1*3DIGIT “.” 1*3DIGIT
"\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"- HEX4 =
hex4 = 1*4HEXDIG
"[#{HEX}]{1,4}"- LASTPART =
lastpart = hex4 | IPv4address
"(?:#{HEX4}|#{IPV4ADDR})"- HEXSEQ1 =
hexseq1 = *( hex4 “:” ) hex4
"(?:#{HEX4}:)*#{HEX4}"- HEXSEQ2 =
hexseq2 = *( hex4 “:” ) lastpart
"(?:#{HEX4}:)*#{LASTPART}"- IPV6ADDR =
IPv6address = hexseq2 | [ hexseq1 ] “::” [ hexseq2 ]
"(?:#{HEXSEQ2}|(?:#{HEXSEQ1})?::(?:#{HEXSEQ2})?)"- IPV6REF =
ipv6reference = “[” IPv6address “]” (RFC 2732)
"\\[#{IPV6ADDR}\\]"- HOST =
host = hostname | IPv4address host = hostname | IPv4address | IPv6reference (RFC 2732)
"(?:#{HOSTNAME}|#{IPV4ADDR}|#{IPV6REF})"- PORT =
port = *digit
'\d*'- HOSTPORT =
hostport = host [ “:” port ]
"#{HOST}(?::#{PORT})?"- USERINFO =
userinfo = *( unreserved | escaped |
";" | ":" | "&" | "=" | "+" | "$" | "," ) "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})*"- PCHAR =
pchar = unreserved | escaped |
":" | "@" | "&" | "=" | "+" | "$" | "," "(?:[#{UNRESERVED}:@&=+$,]|#{ESCAPED})"- PARAM =
param = *pchar
"#{PCHAR}*"- SEGMENT =
segment = *pchar *( “;” param )
"#{PCHAR}*(?:;#{PARAM})*"- PATH_SEGMENTS =
path_segments = segment *( “/” segment )
"#{SEGMENT}(?:/#{SEGMENT})*"- SERVER =
server = [ [ userinfo “@” ] hostport ]
"(?:#{USERINFO}@)?#{HOSTPORT}"- REG_NAME =
reg_name = 1*( unreserved | escaped | “$” | “,” |
";" | ":" | "@" | "&" | "=" | "+" ) "(?:[#{UNRESERVED}$,;:@&=+]|#{ESCAPED})+"- AUTHORITY =
authority = server | reg_name
"(?:#{SERVER}|#{REG_NAME})"- REL_SEGMENT =
rel_segment = 1*( unreserved | escaped |
";" | "@" | "&" | "=" | "+" | "$" | "," ) "(?:[#{UNRESERVED};@&=+$,]|#{ESCAPED})+"- SCHEME =
scheme = alpha *( alpha | digit | “+” | “-” | “.” )
"[#{ALPHA}][-+.#{ALPHA}\\d]*"- ABS_PATH =
abs_path = “/” path_segments
"/#{PATH_SEGMENTS}"- REL_PATH =
rel_path = rel_segment [ abs_path ]
"#{REL_SEGMENT}(?:#{ABS_PATH})?"- NET_PATH =
net_path = “//” authority [ abs_path ]
"//#{AUTHORITY}(?:#{ABS_PATH})?"- HIER_PART =
hier_part = ( net_path | abs_path ) [ “?” query ]
"(?:#{NET_PATH}|#{ABS_PATH})(?:\\?(?:#{QUERY}))?"- OPAQUE_PART =
opaque_part = uric_no_slash *uric
"#{URIC_NO_SLASH}#{URIC}*"- ABS_URI =
absoluteURI = scheme “:” ( hier_part | opaque_part )
"#{SCHEME}:(?:#{HIER_PART}|#{OPAQUE_PART})"- REL_URI =
relativeURI = ( net_path | abs_path | rel_path ) [ “?” query ]
"(?:#{NET_PATH}|#{ABS_PATH}|#{REL_PATH})(?:\\?#{QUERY})?"- URI_REF =
URI-reference = [ absoluteURI | relativeURI ] [ “#” fragment ]
"(?:#{ABS_URI}|#{REL_URI})?(?:##{FRAGMENT})?"- X_ABS_URI =
XXX:
" (#{PATTERN::SCHEME}): (?# 1: scheme) (?: (#{PATTERN::OPAQUE_PART}) (?# 2: opaque) | (?:(?: //(?: (?:(?:(#{PATTERN::USERINFO})@)? (?# 3: userinfo) (?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port) | (#{PATTERN::REG_NAME}) (?# 6: registry) ) | (?!//)) (?# XXX: '//' is the mark for hostport) (#{PATTERN::ABS_PATH})? (?# 7: path) )(?:\\?(#{PATTERN::QUERY}))? (?# 8: query) ) (?:\\#(#{PATTERN::FRAGMENT}))? (?# 9: fragment) "- X_REL_URI =
" (?: (?: // (?: (?:(#{PATTERN::USERINFO})@)? (?# 1: userinfo) (#{PATTERN::HOST})?(?::(\\d*))? (?# 2: host, 3: port) | (#{PATTERN::REG_NAME}) (?# 4: registry) ) ) | (#{PATTERN::REL_SEGMENT}) (?# 5: rel_segment) )? (#{PATTERN::ABS_PATH})? (?# 6: abs_path) (?:\\?(#{PATTERN::QUERY}))? (?# 7: query) (?:\\#(#{PATTERN::FRAGMENT}))? (?# 8: fragment) "